// LTYPEM spec6 { outcode(int($1), &$2); }
MOVL AX, BX
MOVL $4, BX
-
+
// LTYPEI spec7 { outcode(int($1), &$2); }
IMULL AX
IMULL $4, CX
// ftsqrt BF, FRB
FTSQRT F2,$7
-// FCFID
+// FCFID
// FCFIDS
FCFID F2,F3
ROUND2(DX,AX,BX,CX, 7,0xfcefa3f8, 9);
ROUND2(CX,DX,AX,BX,12,0x676f02d9,14);
ROUND2(BX,CX,DX,AX, 0,0x8d2a4c8a,20);
-
+
MOVL (5*4)(SI), DI
MOVL CX, BP
ROUND2(DX,AX,BX,CX, 7,0xfcefa3f8, 9);
ROUND2(CX,DX,AX,BX,12,0x676f02d9,14);
ROUND2(BX,CX,DX,AX, 0,0x8d2a4c8a,20);
-
+
MOVL (5*4)(SI), R8
MOVL CX, R9
ROLL $shift, a; \
XORL c, R9; \
ADDL b, a
-
+
ROUND4(AX,BX,CX,DX, 7,0xf4292244, 6);
ROUND4(DX,AX,BX,CX,14,0x432aff97,10);
ROUND4(CX,DX,AX,BX, 5,0xab9423a7,15);
ROUND2(DX,AX,BX,CX, 7,0xfcefa3f8, 9);
ROUND2(CX,DX,AX,BX,12,0x676f02d9,14);
ROUND2(BX,CX,DX,AX, 0,0x8d2a4c8a,20);
-
+
MOVL (5*4)(SI), R8
MOVL CX, R9
ROLL $shift, a; \
XORL c, R9; \
ADDL b, a
-
+
ROUND4(AX,BX,CX,DX, 7,0xf4292244, 6);
ROUND4(DX,AX,BX,CX,14,0x432aff97,10);
ROUND4(CX,DX,AX,BX, 5,0xab9423a7,15);
MOVL p_len+8(FP), DX
SHRL $6, DX
SHLL $6, DX
-
+
LEAL (SI)(DX*1), DI
MOVL (0*4)(BP), AX
MOVL (1*4)(BP), BX
ROUND1x(DX, BP, AX, BX, CX, 17)
ROUND1x(CX, DX, BP, AX, BX, 18)
ROUND1x(BX, CX, DX, BP, AX, 19)
-
+
ROUND2(AX, BX, CX, DX, BP, 20)
ROUND2(BP, AX, BX, CX, DX, 21)
ROUND2(DX, BP, AX, BX, CX, 22)
ROUND2(DX, BP, AX, BX, CX, 37)
ROUND2(CX, DX, BP, AX, BX, 38)
ROUND2(BX, CX, DX, BP, AX, 39)
-
+
ROUND3(AX, BX, CX, DX, BP, 40)
ROUND3(BP, AX, BX, CX, DX, 41)
ROUND3(DX, BP, AX, BX, CX, 42)
ROUND3(DX, BP, AX, BX, CX, 57)
ROUND3(CX, DX, BP, AX, BX, 58)
ROUND3(BX, CX, DX, BP, AX, 59)
-
+
ROUND4(AX, BX, CX, DX, BP, 60)
ROUND4(BP, AX, BX, CX, DX, 61)
ROUND4(DX, BP, AX, BX, CX, 62)
MOVL b, R9; \
ANDL c, R9; \
ORL R8, R9
-
+
#define FUNC4 FUNC2
#define MIX(a, b, c, d, e, const) \
MOVQ p_len+16(FP), DX
SHRQ $6, DX
SHLQ $6, DX
-
+
LEAQ (SI)(DX*1), DI
MOVL (0*4)(BP), AX
MOVL (1*4)(BP), BX
ROUND1x(DX, BP, AX, BX, CX, 17)
ROUND1x(CX, DX, BP, AX, BX, 18)
ROUND1x(BX, CX, DX, BP, AX, 19)
-
+
ROUND2(AX, BX, CX, DX, BP, 20)
ROUND2(BP, AX, BX, CX, DX, 21)
ROUND2(DX, BP, AX, BX, CX, 22)
ROUND2(DX, BP, AX, BX, CX, 37)
ROUND2(CX, DX, BP, AX, BX, 38)
ROUND2(BX, CX, DX, BP, AX, 39)
-
+
ROUND3(AX, BX, CX, DX, BP, 40)
ROUND3(BP, AX, BX, CX, DX, 41)
ROUND3(DX, BP, AX, BX, CX, 42)
ROUND3(DX, BP, AX, BX, CX, 57)
ROUND3(CX, DX, BP, AX, BX, 58)
ROUND3(BX, CX, DX, BP, AX, 59)
-
+
ROUND4(AX, BX, CX, DX, BP, 60)
ROUND4(BP, AX, BX, CX, DX, 61)
ROUND4(DX, BP, AX, BX, CX, 62)
MOVL b, R9; \
ANDL c, R9; \
ORL R8, R9
-
+
#define FUNC4 FUNC2
#define MIX(a, b, c, d, e, const) \
MOVL p_len+8(FP), DX
SHRQ $6, DX
SHLQ $6, DX
-
+
LEAQ (SI)(DX*1), DI
MOVL (0*4)(R14), AX
MOVL (1*4)(R14), BX
ROUND1x(DX, BP, AX, BX, CX, 17)
ROUND1x(CX, DX, BP, AX, BX, 18)
ROUND1x(BX, CX, DX, BP, AX, 19)
-
+
ROUND2(AX, BX, CX, DX, BP, 20)
ROUND2(BP, AX, BX, CX, DX, 21)
ROUND2(DX, BP, AX, BX, CX, 22)
ROUND2(DX, BP, AX, BX, CX, 37)
ROUND2(CX, DX, BP, AX, BX, 38)
ROUND2(BX, CX, DX, BP, AX, 39)
-
+
ROUND3(AX, BX, CX, DX, BP, 40)
ROUND3(BP, AX, BX, CX, DX, 41)
ROUND3(DX, BP, AX, BX, CX, 42)
ROUND3(DX, BP, AX, BX, CX, 57)
ROUND3(CX, DX, BP, AX, BX, 58)
ROUND3(BX, CX, DX, BP, AX, 59)
-
+
ROUND4(AX, BX, CX, DX, BP, 60)
ROUND4(BP, AX, BX, CX, DX, 61)
ROUND4(DX, BP, AX, BX, CX, 62)
ORR Rt1<<24, Rt0, Rt0 ; \
MOVW.P Rt0, 4(Rw) ; \
ADD Rt0, Re, Re
-
+
// tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf]
// w[i&0xf] = tmp<<1 | tmp>>(32-1)
- // e += w[i&0xf]
+ // e += w[i&0xf]
#define SHUFFLE(Re) \
MOVW (-16*4)(Rw), Rt0 ; \
MOVW (-14*4)(Rw), Rt1 ; \
ROUND1x(Rd, Re, Ra, Rb, Rc)
ROUND1x(Rc, Rd, Re, Ra, Rb)
ROUND1x(Rb, Rc, Rd, Re, Ra)
-
+
MOVW $0x6ED9EBA1, Rconst
MOVW $4, Rctr
loop2: ROUND2(Ra, Rb, Rc, Rd, Re)
ROUND2(Rb, Rc, Rd, Re, Ra)
SUB.S $1, Rctr
BNE loop2
-
+
MOVW $0x8F1BBCDC, Rconst
MOVW $4, Rctr
loop3: ROUND3(Ra, Rb, Rc, Rd, Re)
ROUND3(Rb, Rc, Rd, Re, Ra)
SUB.S $1, Rctr
BNE loop3
-
+
MOVW $0xCA62C1D6, Rconst
MOVW $4, Rctr
loop4: ROUND4(Ra, Rb, Rc, Rd, Re)
ADDQ $16, DI
SUBQ $16, R8
JMP loop
-
+
diff64:
ADDQ $48, SI
ADDQ $48, DI
ADDQ $16, DI
SUBQ $16, R8
JMP loop
-
+
// AX = bit mask of differences
diff16:
BSFQ AX, BX // index of first byte that differs
JB bigloop
CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
JE hugeloop_avx2
-
+
// 64 bytes at a time using xmm registers
hugeloop:
CMPQ BX, $64
CMPQ BX, $8
JB small
-
+
// 64 bytes at a time using xmm registers
hugeloop:
CMPQ BX, $64
TEXT ·Equal(SB),NOSPLIT,$0-25
MOVW a_len+4(FP), R1
MOVW b_len+16(FP), R3
-
+
CMP R1, R3 // unequal lengths are not equal
B.NE notequal
SHRL CX, DX:AX // w>>s | w1<<ŝ
MOVL DX, (DI)(BX*4) // z[i] = w>>s | w1<<ŝ
ADDL $1, BX // i++
-
+
E9: CMPL BX, BP
JL L9 // i < n-1
MOVQ r+56(FP), CX // c = r
MOVQ z_len+8(FP), R11
MOVQ $0, BX // i = 0
-
+
CMPQ R11, $4
JL E5
-
+
U5: // i+4 <= n
// regular loop body unrolled 4x
MOVQ (0*8)(R8)(BX*8), AX
MOVQ AX, (3*8)(R10)(BX*8)
MOVQ DX, CX
ADDQ $4, BX // i += 4
-
+
LEAQ 4(BX), DX
CMPQ DX, R11
JLE U5
MOVW z_len+4(FP), R5
TEQ $0, R5
BEQ X7
-
+
MOVW z+0(FP), R1
MOVW x+12(FP), R2
ADD R5<<2, R2, R2
MOVW $32, R4
SUB R3, R4
MOVW $0, R7
-
+
MOVW.W -4(R2), R6
MOVW R6<<R3, R7
MOVW R6>>R4, R6
TEXT ·addVV(SB),NOSPLIT,$0
MOVD addvectorfacility+0x00(SB),R1
BR (R1)
-
+
TEXT ·addVV_check(SB),NOSPLIT, $0
MOVB ·hasVX(SB), R1
CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
BLT v1
SUB $12, R3 // n -= 16
BLT A1 // if n < 0 goto A1
-
+
MOVD R8, R5
MOVD R9, R6
MOVD R2, R7
TEXT ·subVV(SB),NOSPLIT,$0
MOVD subvectorfacility+0x00(SB),R1
BR (R1)
-
+
TEXT ·subVV_check(SB),NOSPLIT,$0
MOVB ·hasVX(SB), R1
CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
MOVD $0, R4 // c = 0
MOVD $0, R0 // make sure it's zero
MOVD $0, R10 // i = 0
-
+
// s/JL/JMP/ below to disable the unrolled loop
SUB $4, R3 // n -= 4
BLT v1 // if n < 0 goto v1
A1: ADD $12, R3 // n += 16
BLT v1 // if n < 0 goto v1
-
+
U1: // n >= 0
// regular loop body unrolled 4x
MOVD 0(R8)(R10*1), R5
TEXT ·addVW(SB),NOSPLIT,$0
MOVD addwvectorfacility+0x00(SB),R1
BR (R1)
-
+
TEXT ·addVW_check(SB),NOSPLIT,$0
MOVB ·hasVX(SB), R1
CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
TEXT ·subVW(SB),NOSPLIT,$0
MOVD subwvectorfacility+0x00(SB),R1
BR (R1)
-
+
TEXT ·subVW_check(SB),NOSPLIT,$0
MOVB ·hasVX(SB), R1
CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
TEXT ·Expm1(SB),NOSPLIT,$0
FLDLN2 // F0=log(2) = 1/log2(e) ~ 0.693147
FMOVD x+0(FP), F0 // F0=x, F1=1/log2(e)
- FABS // F0=|x|, F1=1/log2(e)
+ FABS // F0=|x|, F1=1/log2(e)
FUCOMPP F0, F1 // compare F0 to F1
FSTSW AX
SAHF
FSCALE // F0=e**x, F1=int(x*log2(e))
FMOVDP F0, F1 // F0=e**x
FLD1 // F0=1, F1=e**x
- FSUBDP F0, F1 // F0=e**x-1
+ FSUBDP F0, F1 // F0=e**x-1
FMOVDP F0, ret+8(FP)
RET
not_finite:
FCOS // F0=cos(reduced_x)
FMOVDP F0, ret+8(FP)
RET
-
+
// func Sin(x float64) float64
TEXT ·Sin(SB),NOSPLIT,$0
FMOVD x+0(FP), F0 // F0=x
#include "textflag.h"
-// func Sqrt(x float64) float64
+// func Sqrt(x float64) float64
TEXT ·Sqrt(SB),NOSPLIT,$0
FMOVD x+0(FP),F0
FSQRT
#include "textflag.h"
-// func Sqrt(x float64) float64
+// func Sqrt(x float64) float64
TEXT ·Sqrt(SB),NOSPLIT,$0
MOVB runtime·goarm(SB), R11
CMP $5, R11
MOVL BX, g_stackguard1(BP)
MOVL BX, (g_stack+stack_lo)(BP)
MOVL SP, (g_stack+stack_hi)(BP)
-
+
// find out information about the processor we're on
#ifdef GOOS_nacl // NaCl doesn't like PUSHFL/POPFL
JMP has_cpuid
MOVL (g_sched+gobuf_sp)(SI), SP
MOVL 0(SP), AX
MOVL AX, (g_sched+gobuf_sp)(SI)
-
+
// If the m on entry was nil, we called needm above to borrow an m
// for the duration of the call. Since the call is over, return it with dropm.
CMPL DX, $0
CMPL BX, $64
JBE aes33to64
JMP aes65plus
-
+
aes0to15:
TESTL BX, BX
JE aes0
ADDL BX, BX
PAND masks<>(SB)(BX*8), X1
-final1:
+final1:
AESENC X0, X1 // scramble input, xor in seed
AESENC X1, X1 // scramble combo 2 times
AESENC X1, X1
// make second starting seed
PXOR runtime·aeskeysched+16(SB), X1
AESENC X1, X1
-
+
// load data to be hashed
MOVOU (AX), X2
MOVOU -16(AX)(BX*1), X3
AESENC X1, X1
AESENC X2, X2
AESENC X3, X3
-
+
MOVOU (AX), X4
MOVOU 16(AX), X5
MOVOU -32(AX)(BX*1), X6
MOVOU -16(AX)(BX*1), X7
-
+
AESENC X0, X4
AESENC X1, X5
AESENC X2, X6
AESENC X3, X7
-
+
AESENC X4, X4
AESENC X5, X5
AESENC X6, X6
AESENC X7, X7
-
+
AESENC X4, X4
AESENC X5, X5
AESENC X6, X6
AESENC X1, X1
AESENC X2, X2
AESENC X3, X3
-
+
// start with last (possibly overlapping) block
MOVOU -64(AX)(BX*1), X4
MOVOU -48(AX)(BX*1), X5
// compute number of remaining 64-byte blocks
DECL BX
SHRL $6, BX
-
+
aesloop:
// scramble state, xor in a block
MOVOU (AX), X0
AESENC X5, X5
AESENC X6, X6
AESENC X7, X7
-
+
AESENC X4, X4
AESENC X5, X5
AESENC X6, X6
DATA masks<>+0x04(SB)/4, $0x00000000
DATA masks<>+0x08(SB)/4, $0x00000000
DATA masks<>+0x0c(SB)/4, $0x00000000
-
+
DATA masks<>+0x10(SB)/4, $0x000000ff
DATA masks<>+0x14(SB)/4, $0x00000000
DATA masks<>+0x18(SB)/4, $0x00000000
DATA masks<>+0x1c(SB)/4, $0x00000000
-
+
DATA masks<>+0x20(SB)/4, $0x0000ffff
DATA masks<>+0x24(SB)/4, $0x00000000
DATA masks<>+0x28(SB)/4, $0x00000000
DATA masks<>+0x2c(SB)/4, $0x00000000
-
+
DATA masks<>+0x30(SB)/4, $0x00ffffff
DATA masks<>+0x34(SB)/4, $0x00000000
DATA masks<>+0x38(SB)/4, $0x00000000
DATA masks<>+0x3c(SB)/4, $0x00000000
-
+
DATA masks<>+0x40(SB)/4, $0xffffffff
DATA masks<>+0x44(SB)/4, $0x00000000
DATA masks<>+0x48(SB)/4, $0x00000000
DATA masks<>+0x4c(SB)/4, $0x00000000
-
+
DATA masks<>+0x50(SB)/4, $0xffffffff
DATA masks<>+0x54(SB)/4, $0x000000ff
DATA masks<>+0x58(SB)/4, $0x00000000
DATA masks<>+0x5c(SB)/4, $0x00000000
-
+
DATA masks<>+0x60(SB)/4, $0xffffffff
DATA masks<>+0x64(SB)/4, $0x0000ffff
DATA masks<>+0x68(SB)/4, $0x00000000
DATA masks<>+0x6c(SB)/4, $0x00000000
-
+
DATA masks<>+0x70(SB)/4, $0xffffffff
DATA masks<>+0x74(SB)/4, $0x00ffffff
DATA masks<>+0x78(SB)/4, $0x00000000
DATA masks<>+0x7c(SB)/4, $0x00000000
-
+
DATA masks<>+0x80(SB)/4, $0xffffffff
DATA masks<>+0x84(SB)/4, $0xffffffff
DATA masks<>+0x88(SB)/4, $0x00000000
DATA masks<>+0x8c(SB)/4, $0x00000000
-
+
DATA masks<>+0x90(SB)/4, $0xffffffff
DATA masks<>+0x94(SB)/4, $0xffffffff
DATA masks<>+0x98(SB)/4, $0x000000ff
DATA masks<>+0x9c(SB)/4, $0x00000000
-
+
DATA masks<>+0xa0(SB)/4, $0xffffffff
DATA masks<>+0xa4(SB)/4, $0xffffffff
DATA masks<>+0xa8(SB)/4, $0x0000ffff
DATA masks<>+0xac(SB)/4, $0x00000000
-
+
DATA masks<>+0xb0(SB)/4, $0xffffffff
DATA masks<>+0xb4(SB)/4, $0xffffffff
DATA masks<>+0xb8(SB)/4, $0x00ffffff
DATA masks<>+0xbc(SB)/4, $0x00000000
-
+
DATA masks<>+0xc0(SB)/4, $0xffffffff
DATA masks<>+0xc4(SB)/4, $0xffffffff
DATA masks<>+0xc8(SB)/4, $0xffffffff
DATA masks<>+0xcc(SB)/4, $0x00000000
-
+
DATA masks<>+0xd0(SB)/4, $0xffffffff
DATA masks<>+0xd4(SB)/4, $0xffffffff
DATA masks<>+0xd8(SB)/4, $0xffffffff
DATA masks<>+0xdc(SB)/4, $0x000000ff
-
+
DATA masks<>+0xe0(SB)/4, $0xffffffff
DATA masks<>+0xe4(SB)/4, $0xffffffff
DATA masks<>+0xe8(SB)/4, $0xffffffff
DATA masks<>+0xec(SB)/4, $0x0000ffff
-
+
DATA masks<>+0xf0(SB)/4, $0xffffffff
DATA masks<>+0xf4(SB)/4, $0xffffffff
DATA masks<>+0xf8(SB)/4, $0xffffffff
DATA shifts<>+0x04(SB)/4, $0x00000000
DATA shifts<>+0x08(SB)/4, $0x00000000
DATA shifts<>+0x0c(SB)/4, $0x00000000
-
+
DATA shifts<>+0x10(SB)/4, $0xffffff0f
DATA shifts<>+0x14(SB)/4, $0xffffffff
DATA shifts<>+0x18(SB)/4, $0xffffffff
DATA shifts<>+0x1c(SB)/4, $0xffffffff
-
+
DATA shifts<>+0x20(SB)/4, $0xffff0f0e
DATA shifts<>+0x24(SB)/4, $0xffffffff
DATA shifts<>+0x28(SB)/4, $0xffffffff
DATA shifts<>+0x2c(SB)/4, $0xffffffff
-
+
DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
DATA shifts<>+0x34(SB)/4, $0xffffffff
DATA shifts<>+0x38(SB)/4, $0xffffffff
DATA shifts<>+0x3c(SB)/4, $0xffffffff
-
+
DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
DATA shifts<>+0x44(SB)/4, $0xffffffff
DATA shifts<>+0x48(SB)/4, $0xffffffff
DATA shifts<>+0x4c(SB)/4, $0xffffffff
-
+
DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
DATA shifts<>+0x54(SB)/4, $0xffffff0f
DATA shifts<>+0x58(SB)/4, $0xffffffff
DATA shifts<>+0x5c(SB)/4, $0xffffffff
-
+
DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
DATA shifts<>+0x64(SB)/4, $0xffff0f0e
DATA shifts<>+0x68(SB)/4, $0xffffffff
DATA shifts<>+0x6c(SB)/4, $0xffffffff
-
+
DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
DATA shifts<>+0x78(SB)/4, $0xffffffff
DATA shifts<>+0x7c(SB)/4, $0xffffffff
-
+
DATA shifts<>+0x80(SB)/4, $0x0b0a0908
DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
DATA shifts<>+0x88(SB)/4, $0xffffffff
DATA shifts<>+0x8c(SB)/4, $0xffffffff
-
+
DATA shifts<>+0x90(SB)/4, $0x0a090807
DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
DATA shifts<>+0x98(SB)/4, $0xffffff0f
DATA shifts<>+0x9c(SB)/4, $0xffffffff
-
+
DATA shifts<>+0xa0(SB)/4, $0x09080706
DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
DATA shifts<>+0xac(SB)/4, $0xffffffff
-
+
DATA shifts<>+0xb0(SB)/4, $0x08070605
DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
DATA shifts<>+0xbc(SB)/4, $0xffffffff
-
+
DATA shifts<>+0xc0(SB)/4, $0x07060504
DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
DATA shifts<>+0xcc(SB)/4, $0xffffffff
-
+
DATA shifts<>+0xd0(SB)/4, $0x06050403
DATA shifts<>+0xd4(SB)/4, $0x0a090807
DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
DATA shifts<>+0xdc(SB)/4, $0xffffff0f
-
+
DATA shifts<>+0xe0(SB)/4, $0x05040302
DATA shifts<>+0xe4(SB)/4, $0x09080706
DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
DATA shifts<>+0xec(SB)/4, $0xffff0f0e
-
+
DATA shifts<>+0xf0(SB)/4, $0x04030201
DATA shifts<>+0xf4(SB)/4, $0x08070605
DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
ANDQ $~15, SP
MOVQ AX, 16(SP)
MOVQ BX, 24(SP)
-
+
// create istack out of the given (operating system) stack.
// _cgo_init may update stackguard.
MOVQ $runtime·g0(SB), DI
// to keep running g.
TEXT runtime·mcall(SB), NOSPLIT, $0-8
MOVQ fn+0(FP), DI
-
+
get_tls(CX)
MOVQ g(CX), AX // save state in g->sched
MOVQ 0(SP), BX // caller's PC
MOVQ m_gsignal(R8), SI
CMPQ SI, DI
JEQ nosave
-
+
// Switch to system stack.
MOVQ m_g0(R8), SI
CALL gosave<>(SB)
get_tls(CX)
MOVQ g(CX), BX
MOVQ g_m(BX), BX
-
+
// Set m->sched.sp = SP, so that if a panic happens
// during the function we are about to execute, it will
// have a valid SP to run on the g0 stack.
MOVQ (g_sched+gobuf_sp)(SI), SP
MOVQ 0(SP), AX
MOVQ AX, (g_sched+gobuf_sp)(SI)
-
+
// If the m on entry was nil, we called needm above to borrow an m
// for the duration of the call. Since the call is over, return it with dropm.
CMPQ R8, $0
// make second starting seed
PXOR runtime·aeskeysched+16(SB), X1
AESENC X1, X1
-
+
// load data to be hashed
MOVOU (AX), X2
MOVOU -16(AX)(CX*1), X3
AESENC X1, X1
AESENC X2, X2
AESENC X3, X3
-
+
MOVOU (AX), X4
MOVOU 16(AX), X5
MOVOU -32(AX)(CX*1), X6
PXOR X1, X5
PXOR X2, X6
PXOR X3, X7
-
+
AESENC X4, X4
AESENC X5, X5
AESENC X6, X6
AESENC X7, X7
-
+
AESENC X4, X4
AESENC X5, X5
AESENC X6, X6
AESENC X7, X7
-
+
AESENC X4, X4
AESENC X5, X5
AESENC X6, X6
AESENC X5, X5
AESENC X6, X6
AESENC X7, X7
-
+
// start with last (possibly overlapping) block
MOVOU -128(AX)(CX*1), X8
MOVOU -112(AX)(CX*1), X9
PXOR X5, X13
PXOR X6, X14
PXOR X7, X15
-
+
// compute number of remaining 128-byte blocks
DECQ CX
SHRQ $7, CX
-
+
aesloop:
// scramble state
AESENC X8, X8
MOVL AX, 16(SP)
MOVL BX, 24(SP)
-
+
// create istack out of the given (operating system) stack.
MOVL $runtime·g0(SB), DI
LEAL (-64*1024+104)(SP), BX
// to keep running g.
TEXT runtime·mcall(SB), NOSPLIT, $0-4
MOVL fn+0(FP), DI
-
+
get_tls(CX)
MOVL g(CX), AX // save state in g->sched
MOVL 0(SP), BX // caller's PC
MOVW R1, 12(R13)
MOVW R2, 16(R13)
BL runtime·reflectcallmove(SB)
- RET
+ RET
CALLFN(·call16, 16)
CALLFN(·call32, 32)
// See cgocall.go for more details.
TEXT ·cgocallback_gofunc(SB),NOSPLIT,$8-16
NO_LOCAL_POINTERS
-
+
// Load m and g from thread-local storage.
MOVB runtime·iscgo(SB), R0
CMP $0, R0
// callee-save in the gcc calling convention, so save them here.
MOVW R11, saveR11-4(SP)
MOVW g, saveG-8(SP)
-
+
BL runtime·load_g(SB)
MOVW g_m(g), R0
MOVW m_curg(R0), R0
MOVW (g_stack+stack_hi)(R0), R0
-
+
MOVW saveG-8(SP), g
MOVW saveR11-4(SP), R11
RET
MOVQ DX, 0x0(SP) /* arg */
MOVQ R8, 0x8(SP) /* argsize (includes padding) */
MOVQ R9, 0x10(SP) /* ctxt */
-
+
CALL CX /* fn */
-
+
MOVQ 0x48(SP), DI
MOVQ 0x50(SP), SI
MOVUPS 0x60(SP), X6
MOVQ 0x30(SP), R13
MOVQ 0x38(SP), R14
MOVQ 0x40(SP), R15
-
+
RET
// func crosscall2(fn func(a unsafe.Pointer, n int32, ctxt uintptr), a unsafe.Pointer, n int32, ctxt uintptr)
// Saves C callee-saved registers and calls fn with three arguments.
TEXT crosscall2(SB),NOSPLIT|NOFRAME,$0
- /*
+ /*
* We still need to save all callee save register as before, and then
* push 3 args for fn (R1, R2, R3).
* Also note that at procedure entry in gc world, 4(R13) will be the
// Build a 32-byte stack frame for us for this call.
// Saved LR (none available) is at the bottom,
- // then the PC argument for setsigsegv,
+ // then the PC argument for setsigsegv,
// then a copy of the LR for us to restore.
MOVD.W $0, -32(RSP)
MOVD R1, 8(RSP)
ADDL BX, DI
ADDL BX, SI
STD
-
+
MOVL BX, CX
SHRL $2, CX
ANDL $3, BX
CMP TMP, TS
BHS _f4tail
- MOVM.IA.W (FROM), [R1-R8]
+ MOVM.IA.W (FROM), [R1-R8]
MOVM.IA.W [R1-R8], (TS)
B _f32loop
/*
* forward copy loop
*/
-forward:
+forward:
MOVL BX, CX
SHRL $2, CX
ANDL $3, BX
ADDQ BX, CX
CMPQ CX, DI
JLS forward
-
+
/*
* whole thing backwards has
* adjusted addresses
MOVW CR, R0 // Save CR in caller's frame
MOVD R0, 8(R1)
MOVDU R1, -320(R1) // Allocate frame.
-
+
// Preserve callee-save registers.
MOVD R14, 24(R1)
MOVD R15, 32(R1)
// 8(DI) - argc
// 12(DI) - argv, then 0, then envv, then 0, then auxv
// NaCl entry here is almost the same, except that there
-// is no saved caller PC, so 0(FP) is -8(FP) and so on.
+// is no saved caller PC, so 0(FP) is -8(FP) and so on.
TEXT _rt0_amd64p32_nacl(SB),NOSPLIT,$16
MOVL DI, 0(SP)
CALL runtime·nacl_sysinfo(SB)
#include "go_asm.h"
#include "go_tls.h"
#include "textflag.h"
-
+
TEXT runtime·sys_umtx_sleep(SB),NOSPLIT,$0
MOVQ addr+0(FP), DI // arg 1 - ptr
MOVL val+8(FP), SI // arg 2 - value
#include "go_asm.h"
#include "go_tls.h"
#include "textflag.h"
-
+
TEXT runtime·sys_umtx_op(SB),NOSPLIT,$-4
MOVL $454, AX
INT $0x80
POPAL
get_tls(CX)
MOVL BX, g(CX)
-
+
MOVL AX, g_m(BX)
CALL runtime·stackcheck(SB) // smashes AX
CALL runtime·mstart(SB)
// Same as in sys_darwin_386.s:/ugliness, different constant.
// DI currently holds m->tls, which must be fs:0x1d0.
// See cgo/gcc_android_amd64.c for the derivation of the constant.
- SUBQ $0x1d0, DI // In android, the tls base
+ SUBQ $0x1d0, DI // In android, the tls base
#else
ADDQ $8, DI // ELF wants to use -8(FS)
#endif
NACL_SYSCALL(SYS_clock_gettime)
MOVL AX, ret+8(FP)
RET
-
+
TEXT runtime·nanotime(SB),NOSPLIT,$20
MOVL $0, 0(SP) // real time clock
LEAL 8(SP), AX
// save g
MOVL DI, 20(SP)
-
+
// g = m->gsignal
MOVL g_m(DI), BX
MOVL m_gsignal(BX), BX
MOVL BX, g(CX)
-
+
// copy arguments for sighandler
MOVL $11, 0(SP) // signal
MOVL $0, 4(SP) // siginfo
// Today those registers are just PC and SP, but in case additional registers
// are relevant in the future (for example DX is the Go func context register)
// we restore as many registers as possible.
- //
+ //
// We smash BP, because that's what the linker smashes during RET.
//
LEAL ctxt+4(FP), BP
// check that g exists
get_tls(CX)
MOVL g(CX), DI
-
+
CMPL DI, $0
JEQ nog
// save g
MOVL DI, 20(SP)
-
+
// g = m->gsignal
MOVL g_m(DI), BX
MOVL m_gsignal(BX), BX
RET
TEXT runtime·lwp_tramp(SB),NOSPLIT,$0
-
+
// Set FS to point at m->tls.
LEAQ m_tls(R8), DI
CALL runtime·settls(SB)
CALL runtime·settls(SB)
POPL AX
POPAL
-
+
// Now segment is established. Initialize m, g.
get_tls(AX)
MOVL DX, g(AX)
INT $64
MOVL AX, ret+4(FP)
RET
-
+
TEXT runtime·plan9_semrelease(SB),NOSPLIT,$0
MOVL $38, AX
INT $64
SYSCALL
MOVL AX, ret+8(FP)
RET
-
+
TEXT runtime·plan9_semrelease(SB),NOSPLIT,$0
MOVQ $38, BP
SYSCALL
// Call a library function with SysV calling conventions.
// The called function can take a maximum of 6 INTEGER class arguments,
-// see
+// see
// Michael Matz, Jan Hubicka, Andreas Jaeger, and Mark Mitchell
-// System V Application Binary Interface
+// System V Application Binary Interface
// AMD64 Architecture Processor Supplement
// section 3.2.3.
//
MOVL 0(AX), AX
MOVQ AX, libcall_err(DI)
-skiperrno2:
+skiperrno2:
RET
// uint32 tstart_sysvicall(M *newm);
// Save m->libcall and m->scratch. We need to do this because we
// might get interrupted by a signal in runtime·asmcgocall.
- // save m->libcall
+ // save m->libcall
MOVQ g_m(R10), BP
LEAQ m_libcall(BP), R11
MOVQ libcall_fn(R11), R10
MOVL (_SYSTEM_TIME+time_hi2), DX
CMPL CX, DX
JNE wall
-
+
// w = DX:AX
// convert to Unix epoch (but still 100ns units)
#define delta 116444736000000000
SUBL $(delta & 0xFFFFFFFF), AX
SBBL $(delta >> 32), DX
-
+
// nano/100 = DX:AX
// split into two decimal halves by div 1e9.
// (decimal point is two spots over from correct place,
DIVL CX
MOVL AX, DI
MOVL DX, SI
-
+
// DI = nano/100/1e9 = nano/1e11 = sec/100, DX = SI = nano/100%1e9
// split DX into seconds and nanoseconds by div 1e7 magic multiply.
MOVL DX, AX
IMULL $10000000, DX
MOVL SI, CX
SUBL DX, CX
-
+
// DI = sec/100 (still)
// BX = (nano/100%1e9)/1e7 = (nano/1e9)%100 = sec%100
// CX = (nano/100%1e9)%1e7 = (nano%1e9)/100 = nsec/100
MOVQ $0, 32(SP) // overlapped
MOVQ runtime·_WriteFile(SB), AX
CALL AX
-
+
RET
// faster get/set last error
// func runtime·udiv(n, d uint32) (q, r uint32)
// compiler knowns the register usage of this function
-// Reference:
+// Reference:
// Sloss, Andrew et. al; ARM System Developer's Guide: Designing and Optimizing System Software
// Morgan Kaufmann; 1 edition (April 8, 2004), ISBN 978-1558608740
#define Rq R0 // input d, output q
MOVL DX, r2+20(FP)
MOVL $0, err+24(FP)
CALL runtime·exitsyscall(SB)
- RET
+ RET
MOVL DX, r2+20(FP)
MOVL $0, err+24(FP)
CALL runtime·exitsyscall(SB)
- RET
+ RET
MOVW $0, R2
MOVW R2, err+24(FP)
BL runtime·exitsyscall(SB)
- RET
+ RET
CALL runtime·gostring(SB)
LEAL str-144(SP), SI
JMP copyresult3
-
+
ok3:
CALL runtime·exitsyscall(SB)
LEAL ·emptystring(SB), SI
-
+
copyresult3:
LEAL err+24(FP), DI
MOVL $0, r2+32(FP)
CMPL AX, $-1
JNE ok4
-
+
LEAL errbuf-128(SP), AX
MOVL AX, sysargs-144(SP)
MOVL $128, sysargs1-140(SP)
CALL runtime·gostring(SB)
LEAL str-144(SP), SI
JMP copyresult4
-
+
ok4:
CALL runtime·exitsyscall(SB)
LEAL ·emptystring(SB), SI
-
+
copyresult4:
LEAL err+36(FP), DI
NO_LOCAL_POINTERS
LEAL newoffset+20(FP), AX
MOVL AX, placeholder+0(FP)
-
+
// copy args down
LEAL placeholder+0(FP), SI
LEAL sysargs-20(SP), DI
MOVSL
MOVL $SYS_SEEK, AX // syscall entry
INT $64
-
+
CMPL AX, $-1
JNE ok6
MOVL AX, newoffset_lo+20(FP)
MOVL AX, newoffset_hi+24(FP)
-
+
CALL syscall·errstr(SB)
MOVL SP, SI
JMP copyresult6
-
+
ok6:
LEAL ·emptystring(SB), SI
-
+
copyresult6:
LEAL err+28(FP), DI
CALL runtime·gostring(SB)
LEAQ str-160(SP), SI
JMP copyresult3
-
+
ok3:
CALL runtime·exitsyscall(SB)
LEAQ ·emptystring(SB), SI
-
+
copyresult3:
LEAQ err+48(FP), DI
MOVQ $0, r2+64(FP)
CMPL AX, $-1
JNE ok4
-
+
LEAQ errbuf-128(SP), AX
MOVQ AX, sysargs-160(SP)
MOVQ $128, sysargs1-152(SP)
CALL runtime·gostring(SB)
LEAQ str-160(SP), SI
JMP copyresult4
-
+
ok4:
CALL runtime·exitsyscall(SB)
LEAQ ·emptystring(SB), SI
-
+
copyresult4:
LEAQ err+72(FP), DI
NO_LOCAL_POINTERS
LEAQ newoffset+32(FP), AX
MOVQ AX, placeholder+0(FP)
-
+
// copy args down
LEAQ placeholder+0(FP), SI
LEAQ sysargs-40(SP), DI
MOVSQ
MOVQ $SYS_SEEK, BP // syscall entry
SYSCALL
-
+
CMPL AX, $-1
JNE ok6
MOVQ AX, newoffset+32(FP)
-
+
CALL syscall·errstr(SB)
MOVQ SP, SI
JMP copyresult6
-
+
ok6:
LEAQ ·emptystring(SB), SI
-
+
copyresult6:
LEAQ err+40(FP), DI