crypto,internal/bytealg: fix assembly that clobbers BP

author Keith Randall <khr@golang.org>

Tue, 11 Aug 2020 20:04:48 +0000 (13:04 -0700)

committer Keith Randall <khr@golang.org>

Sun, 16 Aug 2020 17:05:18 +0000 (17:05 +0000)
author Keith Randall <khr@golang.org>
Tue, 11 Aug 2020 20:04:48 +0000 (13:04 -0700)
committer Keith Randall <khr@golang.org>
Sun, 16 Aug 2020 17:05:18 +0000 (17:05 +0000)
diff --git a/src/crypto/elliptic/p256_asm_amd64.s b/src/crypto/elliptic/p256_asm_amd64.s

index 7afa54a58ca153df25b28a17a794f6f11b0abc9a..c77b11bcf25c11a8179a2ee6ccacefe5ec5b695b 100644 (file)
--- a/src/crypto/elliptic/p256_asm_amd64.s
+++ b/src/crypto/elliptic/p256_asm_amd64.s
@@ -1336,7 +1336,7 @@ TEXT p256SubInternal(SB),NOSPLIT,$0
  
         RET
  /* ---------------------------------------*/
-TEXT p256MulInternal(SB),NOSPLIT,$0
+TEXT p256MulInternal(SB),NOSPLIT,$8
         MOVQ acc4, mul0
         MULQ t0
         MOVQ mul0, acc0
@@ -1519,7 +1519,7 @@ TEXT p256MulInternal(SB),NOSPLIT,$0
  
         RET
  /* ---------------------------------------*/
-TEXT p256SqrInternal(SB),NOSPLIT,$0
+TEXT p256SqrInternal(SB),NOSPLIT,$8
  
         MOVQ acc4, mul0
         MULQ acc5
@@ -2345,4 +2345,3 @@ TEXT ·p256PointDoubleAsm(SB),NOSPLIT,$256-48
  
         RET
  /* ---------------------------------------*/
-
diff --git a/src/crypto/md5/md5block_amd64.s b/src/crypto/md5/md5block_amd64.s

index 90d932b14645b941b372c5756388d7cfc7b5c274..7c7d92d7e806dd399a0fd47be32aee1d690ed36e 100644 (file)
--- a/src/crypto/md5/md5block_amd64.s
+++ b/src/crypto/md5/md5block_amd64.s
@@ -13,7 +13,7 @@
  // Licence: I hereby disclaim the copyright on this code and place it
  // in the public domain.
  
-TEXT   ·block(SB),NOSPLIT,$0-32
+TEXT   ·block(SB),NOSPLIT,$8-32
         MOVQ    dig+0(FP),      BP
         MOVQ    p+8(FP),        SI
         MOVQ    p_len+16(FP), DX
diff --git a/src/internal/bytealg/index_amd64.s b/src/internal/bytealg/index_amd64.s

index 4459820801082efa47ed2d8e6b73c99c2962899c..6193b572393a5829221682784fba697728cbdbc7 100644 (file)
--- a/src/internal/bytealg/index_amd64.s
+++ b/src/internal/bytealg/index_amd64.s
@@ -8,7 +8,7 @@
  TEXT ·Index(SB),NOSPLIT,$0-56
         MOVQ a_base+0(FP), DI
         MOVQ a_len+8(FP), DX
-       MOVQ b_base+24(FP), BP
+       MOVQ b_base+24(FP), R8
         MOVQ b_len+32(FP), AX
         MOVQ DI, R10
         LEAQ ret+48(FP), R11
@@ -17,7 +17,7 @@ TEXT ·Index(SB),NOSPLIT,$0-56
  TEXT ·IndexString(SB),NOSPLIT,$0-40
         MOVQ a_base+0(FP), DI
         MOVQ a_len+8(FP), DX
-       MOVQ b_base+16(FP), BP
+       MOVQ b_base+16(FP), R8
         MOVQ b_len+24(FP), AX
         MOVQ DI, R10
         LEAQ ret+32(FP), R11
@@ -26,7 +26,7 @@ TEXT ·IndexString(SB),NOSPLIT,$0-40
  // AX: length of string, that we are searching for
  // DX: length of string, in which we are searching
  // DI: pointer to string, in which we are searching
-// BP: pointer to string, that we are searching for
+// R8: pointer to string, that we are searching for
  // R11: address, where to put return value
  // Note: We want len in DX and AX, because PCMPESTRI implicitly consumes them
  TEXT indexbody<>(SB),NOSPLIT,$0
@@ -37,11 +37,11 @@ TEXT indexbody<>(SB),NOSPLIT,$0
  no_sse42:
         CMPQ AX, $2
         JA   _3_or_more
-       MOVW (BP), BP
+       MOVW (R8), R8
         LEAQ -1(DI)(DX*1), DX
  loop2:
         MOVW (DI), SI
-       CMPW SI,BP
+       CMPW SI,R8
         JZ success
         ADDQ $1,DI
         CMPQ DI,DX
@@ -50,12 +50,12 @@ loop2:
  _3_or_more:
         CMPQ AX, $3
         JA   _4_or_more
-       MOVW 1(BP), BX
-       MOVW (BP), BP
+       MOVW 1(R8), BX
+       MOVW (R8), R8
         LEAQ -2(DI)(DX*1), DX
  loop3:
         MOVW (DI), SI
-       CMPW SI,BP
+       CMPW SI,R8
         JZ   partial_success3
         ADDQ $1,DI
         CMPQ DI,DX
@@ -72,11 +72,11 @@ partial_success3:
  _4_or_more:
         CMPQ AX, $4
         JA   _5_or_more
-       MOVL (BP), BP
+       MOVL (R8), R8
         LEAQ -3(DI)(DX*1), DX
  loop4:
         MOVL (DI), SI
-       CMPL SI,BP
+       CMPL SI,R8
         JZ   success
         ADDQ $1,DI
         CMPQ DI,DX
@@ -87,11 +87,11 @@ _5_or_more:
         JA   _8_or_more
         LEAQ 1(DI)(DX*1), DX
         SUBQ AX, DX
-       MOVL -4(BP)(AX*1), BX
-       MOVL (BP), BP
+       MOVL -4(R8)(AX*1), BX
+       MOVL (R8), R8
  loop5to7:
         MOVL (DI), SI
-       CMPL SI,BP
+       CMPL SI,R8
         JZ   partial_success5to7
         ADDQ $1,DI
         CMPQ DI,DX
@@ -108,11 +108,11 @@ partial_success5to7:
  _8_or_more:
         CMPQ AX, $8
         JA   _9_or_more
-       MOVQ (BP), BP
+       MOVQ (R8), R8
         LEAQ -7(DI)(DX*1), DX
  loop8:
         MOVQ (DI), SI
-       CMPQ SI,BP
+       CMPQ SI,R8
         JZ   success
         ADDQ $1,DI
         CMPQ DI,DX
@@ -123,11 +123,11 @@ _9_or_more:
         JA   _16_or_more
         LEAQ 1(DI)(DX*1), DX
         SUBQ AX, DX
-       MOVQ -8(BP)(AX*1), BX
-       MOVQ (BP), BP
+       MOVQ -8(R8)(AX*1), BX
+       MOVQ (R8), R8
  loop9to15:
         MOVQ (DI), SI
-       CMPQ SI,BP
+       CMPQ SI,R8
         JZ   partial_success9to15
         ADDQ $1,DI
         CMPQ DI,DX
@@ -144,7 +144,7 @@ partial_success9to15:
  _16_or_more:
         CMPQ AX, $16
         JA   _17_or_more
-       MOVOU (BP), X1
+       MOVOU (R8), X1
         LEAQ -15(DI)(DX*1), DX
  loop16:
         MOVOU (DI), X2
@@ -161,8 +161,8 @@ _17_or_more:
         JA   _32_or_more
         LEAQ 1(DI)(DX*1), DX
         SUBQ AX, DX
-       MOVOU -16(BP)(AX*1), X0
-       MOVOU (BP), X1
+       MOVOU -16(R8)(AX*1), X0
+       MOVOU (R8), X1
  loop17to31:
         MOVOU (DI), X2
         PCMPEQB X1,X2
@@ -188,7 +188,7 @@ partial_success17to31:
  _32_or_more:
         CMPQ AX, $32
         JA   _33_to_63
-       VMOVDQU (BP), Y1
+       VMOVDQU (R8), Y1
         LEAQ -31(DI)(DX*1), DX
  loop32:
         VMOVDQU (DI), Y2
@@ -203,8 +203,8 @@ loop32:
  _33_to_63:
         LEAQ 1(DI)(DX*1), DX
         SUBQ AX, DX
-       VMOVDQU -32(BP)(AX*1), Y0
-       VMOVDQU (BP), Y1
+       VMOVDQU -32(R8)(AX*1), Y0
+       VMOVDQU (R8), Y1
  loop33to63:
         VMOVDQU (DI), Y2
         VPCMPEQB Y1, Y2, Y3
@@ -241,10 +241,10 @@ sse42:
         // This value was determined experimentally and is the ~same
         // on Nehalem (first with SSE42) and Haswell.
         JAE _9_or_more
-       LEAQ 16(BP), SI
+       LEAQ 16(R8), SI
         TESTW $0xff0, SI
         JEQ no_sse42
-       MOVOU (BP), X1
+       MOVOU (R8), X1
         LEAQ -15(DI)(DX*1), SI
         MOVQ $16, R9
         SUBQ AX, R9 // We advance by 16-len(sep) each iteration, so precalculate it into R9
diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s

index b60057ce8319e023df3e46612d1f124f9b1961b3..621c01b365b9a315676fd034fd156f74b456e534 100644 (file)
--- a/src/runtime/sys_linux_amd64.s
+++ b/src/runtime/sys_linux_amd64.s
@@ -212,7 +212,7 @@ TEXT runtime·walltime1(SB),NOSPLIT,$16-12
         // due to stack probes inserted to avoid stack/heap collisions.
         // See issue #20427.
  
-       MOVQ    SP, BP  // Save old SP; BP unchanged by C code.
+       MOVQ    SP, R12 // Save old SP; R12 unchanged by C code.
  
         get_tls(CX)
         MOVQ    g(CX), AX
@@ -250,7 +250,7 @@ noswitch:
         MOVQ    0(SP), AX       // sec
         MOVQ    8(SP), DX       // nsec
  ret:
-       MOVQ    BP, SP          // Restore real SP
+       MOVQ    R12, SP         // Restore real SP
         // Restore vdsoPC, vdsoSP
         // We don't worry about being signaled between the two stores.
         // If we are not in a signal handler, we'll restore vdsoSP to 0,
@@ -277,7 +277,7 @@ fallback:
  TEXT runtime·nanotime1(SB),NOSPLIT,$16-8
         // Switch to g0 stack. See comment above in runtime·walltime.
  
-       MOVQ    SP, BP  // Save old SP; BP unchanged by C code.
+       MOVQ    SP, R12 // Save old SP; R12 unchanged by C code.
  
         get_tls(CX)
         MOVQ    g(CX), AX
@@ -315,7 +315,7 @@ noswitch:
         MOVQ    0(SP), AX       // sec
         MOVQ    8(SP), DX       // nsec
  ret:
-       MOVQ    BP, SP          // Restore real SP
+       MOVQ    R12, SP         // Restore real SP
         // Restore vdsoPC, vdsoSP
         // We don't worry about being signaled between the two stores.
         // If we are not in a signal handler, we'll restore vdsoSP to 0,
author	Keith Randall <khr@golang.org>
	Tue, 11 Aug 2020 20:04:48 +0000 (13:04 -0700)
committer	Keith Randall <khr@golang.org>
	Sun, 16 Aug 2020 17:05:18 +0000 (17:05 +0000)
src/crypto/elliptic/p256_asm_amd64.s		patch \| blob \| history
src/crypto/md5/md5block_amd64.s		patch \| blob \| history
src/internal/bytealg/index_amd64.s		patch \| blob \| history
src/runtime/sys_linux_amd64.s		patch \| blob \| history