// license that can be found in the LICENSE file.
TEXT ·IndexByte(SB),7,$0
- MOVL p+0(FP), SI
- MOVL len+4(FP), CX
- MOVB b+12(FP), AL
+ MOVL s+0(FP), SI
+ MOVL s+4(FP), CX
+ MOVB c+12(FP), AL
MOVL SI, DI
CLD; REPN; SCASB
JZ 3(PC)
- MOVL $-1, ret+16(FP)
+ MOVL $-1, r+16(FP)
RET
SUBL SI, DI
SUBL $1, DI
- MOVL DI, ret+16(FP)
+ MOVL DI, r+16(FP)
RET
TEXT ·Equal(SB),7,$0
- MOVL len+4(FP), BX
- MOVL len1+16(FP), CX
+ MOVL a+4(FP), BX
+ MOVL b+16(FP), CX
MOVL $0, AX
CMPL BX, CX
JNE eqret
- MOVL p+0(FP), SI
- MOVL q+12(FP), DI
+ MOVL a+0(FP), SI
+ MOVL b+12(FP), DI
CLD
REP; CMPSB
JNE eqret
MOVL $1, AX
eqret:
- MOVB AX, ret+24(FP)
+ MOVB AX, r+24(FP)
RET
// license that can be found in the LICENSE file.
TEXT ·IndexByte(SB),7,$0
- MOVQ p+0(FP), SI
- MOVL len+8(FP), BX
- MOVB b+16(FP), AL
+ MOVQ s+0(FP), SI
+ MOVL s+8(FP), BX
+ MOVB c+16(FP), AL
MOVQ SI, DI
CMPL BX, $16
JZ success
failure:
- MOVL $-1, ret+24(FP)
+ MOVL $-1, r+24(FP)
RET
// handle for lengths < 16
MOVL BX, CX
REPN; SCASB
JZ success
- MOVL $-1, ret+24(FP)
+ MOVL $-1, r+24(FP)
RET
// we've found the chunk containing the byte
BSFW DX, DX
SUBQ SI, DI
ADDQ DI, DX
- MOVL DX, ret+24(FP)
+ MOVL DX, r+24(FP)
RET
success:
SUBQ SI, DI
SUBL $1, DI
- MOVL DI, ret+24(FP)
+ MOVL DI, r+24(FP)
RET
TEXT ·Equal(SB),7,$0
- MOVL len+8(FP), BX
- MOVL len1+24(FP), CX
+ MOVL a+8(FP), BX
+ MOVL b+24(FP), CX
MOVL $0, AX
CMPL BX, CX
JNE eqret
- MOVQ p+0(FP), SI
- MOVQ q+16(FP), DI
+ MOVQ a+0(FP), SI
+ MOVQ b+16(FP), DI
CLD
REP; CMPSB
MOVL $1, DX
CMOVLEQ DX, AX
eqret:
- MOVB AX, ret+32(FP)
+ MOVB AX, r+32(FP)
RET
// license that can be found in the LICENSE file.
TEXT ·IndexByte(SB),7,$0
- MOVW base+0(FP), R0
- MOVW len+4(FP), R1
+ MOVW s+0(FP), R0
+ MOVW s+4(FP), R1
MOVBU c+12(FP), R2 // byte to find
MOVW R0, R4 // store base for later
ADD R0, R1 // end
SUB $1, R0 // R0 will be one beyond the position we want
SUB R4, R0 // remove base
- MOVW R0, index+16(FP)
+ MOVW R0, r+16(FP)
RET
_notfound:
MOVW $-1, R0
- MOVW R0, index+16(FP)
+ MOVW R0, r+16(FP)
RET
TEXT ·Equal(SB),7,$0
- MOVW alen+4(FP), R1
- MOVW blen+16(FP), R3
+ MOVW a+4(FP), R1
+ MOVW b+16(FP), R3
CMP R1, R3 // unequal lengths are not equal
B.NE _notequal
- MOVW aptr+0(FP), R0
- MOVW bptr+12(FP), R2
+ MOVW a+0(FP), R0
+ MOVW b+12(FP), R2
ADD R0, R1 // end
_next:
_notequal:
MOVW $0, R0
- MOVW R0, equal+24(FP)
+ MOVBU R0, r+24(FP)
RET
_equal:
MOVW $1, R0
- MOVW R0, equal+24(FP)
+ MOVBU R0, r+24(FP)
RET
// castagnoliSSE42 is defined in crc_amd64.s and uses the SSE4.2 CRC32
// instruction.
-func castagnoliSSE42(uint32, []byte) uint32
+func castagnoliSSE42(crc uint32, p []byte) uint32
var sse42 = haveSSE42()
done:
NOTL AX
- MOVL AX, ret+24(FP)
+ MOVL AX, r+24(FP)
RET
// func haveSSE42() bool
CPUID
SHRQ $20, CX
ANDQ $1, CX
- MOVB CX, ret+0(FP)
+ MOVB CX, r+0(FP)
RET
// license that can be found in the LICENSE file.
TEXT ·Abs(SB),7,$0
- MOVW lo+0(FP), R0
- MOVW hi+4(FP), R1
+ MOVW x+0(FP), R0
+ MOVW x+4(FP), R1
AND $((1<<31)-1), R1
- MOVW R0, resultlo+8(FP)
- MOVW R1, resulthi+12(FP)
+ MOVW R0, r+8(FP)
+ MOVW R1, r+12(FP)
RET
MOVL z+0(FP), DI
MOVL x+12(FP), SI
MOVL y+24(FP), CX
- MOVL n+4(FP), BP
+ MOVL z+4(FP), BP
MOVL $0, BX // i = 0
MOVL $0, DX // c = 0
JMP E1
MOVL z+0(FP), DI
MOVL x+12(FP), SI
MOVL y+24(FP), CX
- MOVL n+4(FP), BP
+ MOVL z+4(FP), BP
MOVL $0, BX // i = 0
MOVL $0, DX // c = 0
JMP E2
MOVL z+0(FP), DI
MOVL x+12(FP), SI
MOVL y+24(FP), AX // c = y
- MOVL n+4(FP), BP
+ MOVL z+4(FP), BP
MOVL $0, BX // i = 0
JMP E3
MOVL z+0(FP), DI
MOVL x+12(FP), SI
MOVL y+24(FP), AX // c = y
- MOVL n+4(FP), BP
+ MOVL z+4(FP), BP
MOVL $0, BX // i = 0
JMP E4
// func shlVU(z, x []Word, s uint) (c Word)
TEXT ·shlVU(SB),7,$0
- MOVL n+4(FP), BX // i = n
+ MOVL z+4(FP), BX // i = z
SUBL $1, BX // i--
JL X8b // i < 0 (n <= 0)
// func shrVU(z, x []Word, s uint) (c Word)
TEXT ·shrVU(SB),7,$0
- MOVL n+4(FP), BP
+ MOVL z+4(FP), BP
SUBL $1, BP // n--
JL X9b // n < 0 (n <= 0)
MOVL x+12(FP), SI
MOVL y+24(FP), BP
MOVL r+28(FP), CX // c = r
- MOVL n+4(FP), BX
+ MOVL z+4(FP), BX
LEAL (DI)(BX*4), DI
LEAL (SI)(BX*4), SI
NEGL BX // i = -n
MOVL z+0(FP), DI
MOVL x+12(FP), SI
MOVL y+24(FP), BP
- MOVL n+4(FP), BX
+ MOVL z+4(FP), BX
LEAL (DI)(BX*4), DI
LEAL (SI)(BX*4), SI
NEGL BX // i = -n
MOVL xn+12(FP), DX // r = xn
MOVL x+16(FP), SI
MOVL y+28(FP), CX
- MOVL n+4(FP), BX // i = n
+ MOVL z+4(FP), BX // i = z
JMP E7
L7: MOVL (SI)(BX*4), AX
// func addVV(z, x, y []Word) (c Word)
TEXT ·addVV(SB),7,$0
- MOVL n+8(FP), DI
+ MOVL z+8(FP), DI
MOVQ x+16(FP), R8
MOVQ y+32(FP), R9
MOVQ z+0(FP), R10
// func subVV(z, x, y []Word) (c Word)
// (same as addVV except for SBBQ instead of ADCQ and label names)
TEXT ·subVV(SB),7,$0
- MOVL n+8(FP), DI
+ MOVL z+8(FP), DI
MOVQ x+16(FP), R8
MOVQ y+32(FP), R9
MOVQ z+0(FP), R10
// func addVW(z, x []Word, y Word) (c Word)
TEXT ·addVW(SB),7,$0
- MOVL n+8(FP), DI
+ MOVL z+8(FP), DI
MOVQ x+16(FP), R8
MOVQ y+32(FP), CX // c = y
MOVQ z+0(FP), R10
// func subVW(z, x []Word, y Word) (c Word)
// (same as addVW except for SUBQ/SBBQ instead of ADDQ/ADCQ and label names)
TEXT ·subVW(SB),7,$0
- MOVL n+8(FP), DI
+ MOVL z+8(FP), DI
MOVQ x+16(FP), R8
MOVQ y+32(FP), CX // c = y
MOVQ z+0(FP), R10
// func shlVU(z, x []Word, s uint) (c Word)
TEXT ·shlVU(SB),7,$0
- MOVL n+8(FP), BX // i = n
+ MOVL z+8(FP), BX // i = z
SUBL $1, BX // i--
JL X8b // i < 0 (n <= 0)
// func shrVU(z, x []Word, s uint) (c Word)
TEXT ·shrVU(SB),7,$0
- MOVL n+8(FP), R11
+ MOVL z+8(FP), R11
SUBL $1, R11 // n--
JL X9b // n < 0 (n <= 0)
MOVQ x+16(FP), R8
MOVQ y+32(FP), R9
MOVQ r+40(FP), CX // c = r
- MOVL n+8(FP), R11
+ MOVL z+8(FP), R11
MOVQ $0, BX // i = 0
JMP E5
MOVQ z+0(FP), R10
MOVQ x+16(FP), R8
MOVQ y+32(FP), R9
- MOVL n+8(FP), R11
+ MOVL z+8(FP), R11
MOVQ $0, BX // i = 0
MOVQ $0, CX // c = 0
JMP E6
MOVQ xn+16(FP), DX // r = xn
MOVQ x+24(FP), R8
MOVQ y+40(FP), R9
- MOVL n+8(FP), BX // i = n
+ MOVL z+8(FP), BX // i = z
JMP E7
L7: MOVQ (R8)(BX*8), AX
MOVW z+0(FP), R1
MOVW x+12(FP), R2
MOVW y+24(FP), R3
- MOVW n+4(FP), R4
+ MOVW z+4(FP), R4
MOVW R4<<2, R4
ADD R1, R4
B E1
MOVW z+0(FP), R1
MOVW x+12(FP), R2
MOVW y+24(FP), R3
- MOVW n+4(FP), R4
+ MOVW z+4(FP), R4
MOVW R4<<2, R4
ADD R1, R4
B E2
MOVW z+0(FP), R1
MOVW x+12(FP), R2
MOVW y+24(FP), R3
- MOVW n+4(FP), R4
+ MOVW z+4(FP), R4
MOVW R4<<2, R4
ADD R1, R4
CMP R1, R4
MOVW z+0(FP), R1
MOVW x+12(FP), R2
MOVW y+24(FP), R3
- MOVW n+4(FP), R4
+ MOVW z+4(FP), R4
MOVW R4<<2, R4
ADD R1, R4
CMP R1, R4
// func shlVU(z, x []Word, s uint) (c Word)
TEXT ·shlVU(SB),7,$0
- MOVW n+4(FP), R5
+ MOVW z+4(FP), R5
CMP $0, R5
BEQ X7
// func shrVU(z, x []Word, s uint) (c Word)
TEXT ·shrVU(SB),7,$0
- MOVW n+4(FP), R5
+ MOVW z+4(FP), R5
CMP $0, R5
BEQ X6
MOVW x+12(FP), R2
MOVW y+24(FP), R3
MOVW r+28(FP), R4
- MOVW n+4(FP), R5
+ MOVW z+4(FP), R5
MOVW R5<<2, R5
ADD R1, R5
B E8
MOVW z+0(FP), R1
MOVW x+12(FP), R2
MOVW y+24(FP), R3
- MOVW n+4(FP), R5
+ MOVW z+4(FP), R5
MOVW R5<<2, R5
ADD R1, R5
MOVW $0, R4
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// func Frexp(x float64) (f float64, e int)
+// func Frexp(f float64) (frac float64, exp int)
TEXT ·Frexp(SB),7,$0
- FMOVD x+0(FP), F0 // F0=x
+ FMOVD f+0(FP), F0 // F0=f
FXAM
FSTSW AX
SAHF
JCS nan_zero_inf
FXTRACT // F0=f (0<=f<1), F1=e
FMULD $(0.5), F0 // F0=f (0.5<=f<1), F1=e
- FMOVDP F0, f+8(FP) // F0=e
+ FMOVDP F0, frac+8(FP) // F0=e
FLD1 // F0=1, F1=e
FADDDP F0, F1 // F0=e+1
- FMOVLP F0, e+16(FP) // (int=int32)
+ FMOVLP F0, exp+16(FP) // (int=int32)
RET
nan_zero_inf:
- FMOVDP F0, f+8(FP) // F0=e
- MOVL $0, e+16(FP) // e=0
+ FMOVDP F0, frac+8(FP) // F0=e
+ MOVL $0, exp+16(FP) // exp=0
RET
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// func Hypot(x, y float64) float64
+// func Hypot(p, q float64) float64
TEXT ·Hypot(SB),7,$0
// test bits for not-finite
- MOVL xh+4(FP), AX // high word x
+ MOVL p+4(FP), AX // high word p
ANDL $0x7ff00000, AX
CMPL AX, $0x7ff00000
JEQ not_finite
- MOVL yh+12(FP), AX // high word y
+ MOVL q+12(FP), AX // high word q
ANDL $0x7ff00000, AX
CMPL AX, $0x7ff00000
JEQ not_finite
- FMOVD x+0(FP), F0 // F0=x
- FABS // F0=|x|
- FMOVD y+8(FP), F0 // F0=y, F1=|x|
- FABS // F0=|y|, F1=|x|
+ FMOVD p+0(FP), F0 // F0=p
+ FABS // F0=|p|
+ FMOVD q+8(FP), F0 // F0=q, F1=|p|
+ FABS // F0=|q|, F1=|p|
FUCOMI F0, F1 // compare F0 to F1
JCC 2(PC) // jump if F0 >= F1
- FXCHD F0, F1 // F0=|x| (larger), F1=|y| (smaller)
+ FXCHD F0, F1 // F0=|p| (larger), F1=|q| (smaller)
FTST // compare F0 to 0
FSTSW AX
ANDW $0x4000, AX
JNE 10(PC) // jump if F0 = 0
- FXCHD F0, F1 // F0=y (smaller), F1=x (larger)
- FDIVD F1, F0 // F0=y(=y/x), F1=x
- FMULD F0, F0 // F0=y*y, F1=x
- FLD1 // F0=1, F1=y*y, F2=x
- FADDDP F0, F1 // F0=1+y*y, F1=x
- FSQRT // F0=sqrt(1+y*y), F1=x
- FMULDP F0, F1 // F0=x*sqrt(1+y*y)
+ FXCHD F0, F1 // F0=q (smaller), F1=p (larger)
+ FDIVD F1, F0 // F0=q(=q/p), F1=p
+ FMULD F0, F0 // F0=q*q, F1=p
+ FLD1 // F0=1, F1=q*q, F2=p
+ FADDDP F0, F1 // F0=1+q*q, F1=p
+ FSQRT // F0=sqrt(1+q*q), F1=p
+ FMULDP F0, F1 // F0=p*sqrt(1+q*q)
FMOVDP F0, r+16(FP)
RET
FMOVDP F0, F1 // F0=0
RET
not_finite:
// test bits for -Inf or +Inf
- MOVL xh+4(FP), AX // high word x
- ORL xl+0(FP), AX // low word x
+ MOVL p+4(FP), AX // high word p
+ ORL p+0(FP), AX // low word p
ANDL $0x7fffffff, AX
CMPL AX, $0x7ff00000
JEQ is_inf
- MOVL yh+12(FP), AX // high word y
- ORL yl+8(FP), AX // low word y
+ MOVL q+12(FP), AX // high word q
+ ORL q+8(FP), AX // low word q
ANDL $0x7fffffff, AX
CMPL AX, $0x7ff00000
JEQ is_inf
- MOVL $0x7ff80000, rh+20(FP) // return NaN = 0x7FF8000000000001
- MOVL $0x00000001, rl+16(FP)
+ MOVL $0x7ff80000, r+20(FP) // return NaN = 0x7FF8000000000001
+ MOVL $0x00000001, r+16(FP)
RET
is_inf:
- MOVL AX, rh+20(FP) // return +Inf = 0x7FF0000000000000
- MOVL $0x00000000, rl+16(FP)
+ MOVL AX, r+20(FP) // return +Inf = 0x7FF0000000000000
+ MOVL $0x00000000, r+16(FP)
RET
#define PosInf 0x7FF0000000000000
#define NaN 0x7FF8000000000001
-// func Hypot(x, y float64) float64
+// func Hypot(p, q float64) float64
TEXT ·Hypot(SB),7,$0
// test bits for special cases
- MOVQ x+0(FP), BX
+ MOVQ p+0(FP), BX
MOVQ $~(1<<63), AX
- ANDQ AX, BX // x = |x|
- MOVQ y+8(FP), CX
- ANDQ AX, CX // y = |y|
+ ANDQ AX, BX // p = |p|
+ MOVQ q+8(FP), CX
+ ANDQ AX, CX // q = |q|
MOVQ $PosInf, AX
CMPQ AX, BX
JLE isInfOrNaN
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// func Ldexp(f float64, e int) float64
+// func Ldexp(frac float64, exp int) float64
TEXT ·Ldexp(SB),7,$0
- FMOVL e+8(FP), F0 // F0=e
- FMOVD x+0(FP), F0 // F0=x, F1=e
+ FMOVL exp+8(FP), F0 // F0=exp
+ FMOVD frac+0(FP), F0 // F0=frac, F1=e
FSCALE // F0=x*2**e, F1=e
FMOVDP F0, F1 // F0=x*2**e
FMOVDP F0, r+12(FP)
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// func Modf(x float64) (int float64, frac float64)
+// func Modf(f float64) (int float64, frac float64)
TEXT ·Modf(SB),7,$0
- FMOVD x+0(FP), F0 // F0=x
- FMOVD F0, F1 // F0=x, F1=x
+ FMOVD f+0(FP), F0 // F0=f
+ FMOVD F0, F1 // F0=f, F1=f
FSTCW -2(SP) // save old Control Word
MOVW -2(SP), AX
ORW $0x0c00, AX // Rounding Control set to truncate
MOVW AX, -4(SP) // store new Control Word
FLDCW -4(SP) // load new Control Word
- FRNDINT // F0=trunc(x), F1=x
+ FRNDINT // F0=trunc(f), F1=f
FLDCW -2(SP) // load old Control Word
- FSUBD F0, F1 // F0=trunc(x), F1=x-trunc(x)
- FMOVDP F0, i+8(FP) // F0=x-trunc(x)
- FMOVDP F0, f+16(FP)
+ FSUBD F0, F1 // F0=trunc(f), F1=f-trunc(f)
+ FMOVDP F0, int+8(FP) // F0=f-trunc(f)
+ FMOVDP F0, frac+16(FP)
RET
FSTSW AX // AX=status word
ANDW $0x0400, AX
JNE 4(PC) // jump if x outside range
- FMOVDP F0, c+16(FP) // F0=sin(x)
- FMOVDP F0, s+8(FP)
+ FMOVDP F0, cos+16(FP) // F0=sin(x)
+ FMOVDP F0, sin+8(FP)
RET
FLDPI // F0=Pi, F1=x
FADDD F0, F0 // F0=2*Pi, F1=x
JNE -3(PC) // jump if reduction incomplete
FMOVDP F0, F1 // F0=reduced_x
FSINCOS // F0=cos(reduced_x), F1=sin(reduced_x)
- FMOVDP F0, c+16(FP) // F0=sin(reduced_x)
- FMOVDP F0, s+8(FP)
+ FMOVDP F0, cos+16(FP) // F0=sin(reduced_x)
+ FMOVDP F0, sin+8(FP)
RET
// CMPXCHGL was introduced on the 486.
LOCK
CMPXCHGL CX, 0(BP)
- SETEQ ret+12(FP)
+ SETEQ swapped+12(FP)
RET
TEXT ·CompareAndSwapUintptr(SB),7,$0
TEXT ·CompareAndSwapUint64(SB),7,$0
MOVL addr+0(FP), BP
- MOVL oldlo+4(FP), AX
- MOVL oldhi+8(FP), DX
- MOVL newlo+12(FP), BX
- MOVL newhi+16(FP), CX
+ MOVL old+4(FP), AX
+ MOVL old+8(FP), DX
+ MOVL new+12(FP), BX
+ MOVL new+16(FP), CX
// CMPXCHG8B was introduced on the Pentium.
LOCK
CMPXCHG8B 0(BP)
- SETEQ ret+20(FP)
+ SETEQ swapped+20(FP)
RET
TEXT ·AddInt32(SB),7,$0
LOCK
XADDL AX, 0(BP)
ADDL AX, CX
- MOVL CX, ret+8(FP)
+ MOVL CX, new+8(FP)
RET
TEXT ·AddUintptr(SB),7,$0
// no XADDQ so use CMPXCHG8B loop
MOVL addr+0(FP), BP
// DI:SI = delta
- MOVL deltalo+4(FP), SI
- MOVL deltahi+8(FP), DI
+ MOVL delta+4(FP), SI
+ MOVL delta+8(FP), DI
// DX:AX = *addr
MOVL 0(BP), AX
MOVL 4(BP), DX
// success
// return CX:BX
- MOVL BX, retlo+12(FP)
- MOVL CX, rethi+16(FP)
+ MOVL BX, new+12(FP)
+ MOVL CX, new+16(FP)
RET
TEXT ·LoadInt32(SB),7,$0
TEXT ·LoadUint32(SB),7,$0
MOVL addr+0(FP), AX
MOVL 0(AX), AX
- MOVL AX, ret+4(FP)
+ MOVL AX, val+4(FP)
RET
TEXT ·LoadInt64(SB),7,$0
MOVL new+12(FP), CX
LOCK
CMPXCHGL CX, 0(BP)
- SETEQ ret+16(FP)
+ SETEQ swapped+16(FP)
RET
TEXT ·CompareAndSwapUintptr(SB),7,$0
MOVQ new+16(FP), CX
LOCK
CMPXCHGQ CX, 0(BP)
- SETEQ ret+24(FP)
+ SETEQ swapped+24(FP)
RET
TEXT ·AddInt32(SB),7,$0
LOCK
XADDL AX, 0(BP)
ADDL AX, CX
- MOVL CX, ret+16(FP)
+ MOVL CX, new+16(FP)
RET
TEXT ·AddUintptr(SB),7,$0
LOCK
XADDQ AX, 0(BP)
ADDQ AX, CX
- MOVQ CX, ret+16(FP)
+ MOVQ CX, new+16(FP)
RET
TEXT ·LoadInt32(SB),7,$0
TEXT ·LoadUint32(SB),7,$0
MOVQ addr+0(FP), AX
MOVL 0(AX), AX
- MOVL AX, ret+8(FP)
+ MOVL AX, val+8(FP)
RET
TEXT ·LoadInt64(SB),7,$0
TEXT ·LoadUint64(SB),7,$0
MOVQ addr+0(FP), AX
MOVQ 0(AX), AX
- MOVQ AX, ret+8(FP)
+ MOVQ AX, val+8(FP)
RET
TEXT ·LoadUintptr(SB),7,$0
TEXT ·LoadPointer(SB),7,$0
MOVQ addr+0(FP), AX
MOVQ 0(AX), AX
- MOVQ AX, ret+8(FP)
+ MOVQ AX, val+8(FP)
RET
TEXT ·StoreInt32(SB),7,$0