]> Cypherpunks repositories - gostls13.git/commitdiff
internal/runtime/atomic: add arm native implementations of And8/Or8
authorMauri de Souza Meneguzzo <mauri870@gmail.com>
Wed, 23 Oct 2024 17:06:39 +0000 (17:06 +0000)
committerCherry Mui <cherryyz@google.com>
Mon, 28 Oct 2024 13:37:30 +0000 (13:37 +0000)
With LDREXB/STREXB now available for the arm assembler we can implement these operations natively. The instructions are armv6k+ but for simplicity I only use them on armv7.

Benchmark results for a raspberry Pi 3 model B+:

goos: linux
goarch: arm
pkg: internal/runtime/atomic
cpu: ARMv7 Processor rev 4 (v7l)
 │   old.txt    │               new.txt               │
 │    sec/op    │   sec/op     vs base                │
And8-4             127.65n ± 0%   68.74n ± 0%  -46.15% (p=0.000 n=10)

Change-Id: Ic87f307c35f7d7f56010980302f253056f6d54dc
GitHub-Last-Rev: a7351802fd212704712b37d183435ab14e58f885
GitHub-Pull-Request: golang/go#70002
Cq-Include-Trybots: luci.golang.try:gotip-linux-arm
Reviewed-on: https://go-review.googlesource.com/c/go/+/622075
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

src/internal/runtime/atomic/atomic_arm.go
src/internal/runtime/atomic/atomic_arm.s

index b58f643ca34c1ce34b81a89a8718211c39bf0aa0..0909d224fcf38e66ba9d04b7fce0e9950f5315b8 100644 (file)
@@ -159,8 +159,11 @@ func goStore64(addr *uint64, v uint64) {
        addrLock(addr).unlock()
 }
 
+//go:noescape
+func Or8(addr *uint8, v uint8)
+
 //go:nosplit
-func Or8(addr *uint8, v uint8) {
+func goOr8(addr *uint8, v uint8) {
        // Align down to 4 bytes and use 32-bit CAS.
        uaddr := uintptr(unsafe.Pointer(addr))
        addr32 := (*uint32)(unsafe.Pointer(uaddr &^ 3))
@@ -173,8 +176,11 @@ func Or8(addr *uint8, v uint8) {
        }
 }
 
+//go:noescape
+func And8(addr *uint8, v uint8)
+
 //go:nosplit
-func And8(addr *uint8, v uint8) {
+func goAnd8(addr *uint8, v uint8) {
        // Align down to 4 bytes and use 32-bit CAS.
        uaddr := uintptr(unsafe.Pointer(addr))
        addr32 := (*uint32)(unsafe.Pointer(uaddr &^ 3))
index 1cf7d8f6ef3560ed41a39d333594399befe57402..93214da8267192f80b9dcb6e72d6bd4d2c9cada1 100644 (file)
@@ -228,6 +228,42 @@ store64loop:
        DMB     MB_ISH
        RET
 
+TEXT armAnd8<>(SB),NOSPLIT,$0-5
+       // addr is already in R1
+       MOVB    v+4(FP), R2
+
+and8loop:
+       LDREXB  (R1), R6
+
+       DMB     MB_ISHST
+
+       AND     R2, R6
+       STREXB  R6, (R1), R0
+       CMP     $0, R0
+       BNE     and8loop
+
+       DMB     MB_ISH
+
+       RET
+
+TEXT armOr8<>(SB),NOSPLIT,$0-5
+       // addr is already in R1
+       MOVB    v+4(FP), R2
+
+or8loop:
+       LDREXB  (R1), R6
+
+       DMB     MB_ISHST
+
+       ORR     R2, R6
+       STREXB  R6, (R1), R0
+       CMP     $0, R0
+       BNE     or8loop
+
+       DMB     MB_ISH
+
+       RET
+
 // The following functions all panic if their address argument isn't
 // 8-byte aligned. Since we're calling back into Go code to do this,
 // we have to cooperate with stack unwinding. In the normal case, the
@@ -310,3 +346,31 @@ TEXT ·Store64(SB),NOSPLIT,$-4-12
        JMP     ·goStore64(SB)
 #endif
        JMP     armStore64<>(SB)
+
+TEXT ·And8(SB),NOSPLIT,$-4-5
+       NO_LOCAL_POINTERS
+       MOVW    addr+0(FP), R1
+
+// Uses STREXB/LDREXB that is armv6k or later.
+// For simplicity we only enable this on armv7.
+#ifndef GOARM_7
+       MOVB    internal∕cpu·ARM+const_offsetARMHasV7Atomics(SB), R11
+       CMP     $1, R11
+       BEQ     2(PC)
+       JMP     ·goAnd8(SB)
+#endif
+       JMP     armAnd8<>(SB)
+
+TEXT ·Or8(SB),NOSPLIT,$-4-5
+       NO_LOCAL_POINTERS
+       MOVW    addr+0(FP), R1
+
+// Uses STREXB/LDREXB that is armv6k or later.
+// For simplicity we only enable this on armv7.
+#ifndef GOARM_7
+       MOVB    internal∕cpu·ARM+const_offsetARMHasV7Atomics(SB), R11
+       CMP     $1, R11
+       BEQ     2(PC)
+       JMP     ·goOr8(SB)
+#endif
+       JMP     armOr8<>(SB)