]> Cypherpunks repositories - gostls13.git/commitdiff
internal/runtime/atomic: add Xchg8 for arm
authorMauri de Souza Meneguzzo <mauri870@gmail.com>
Mon, 28 Oct 2024 23:10:31 +0000 (23:10 +0000)
committerCherry Mui <cherryyz@google.com>
Wed, 30 Oct 2024 18:09:49 +0000 (18:09 +0000)
For #69735

Change-Id: I18c0ca15d94a9b1751c1e55459283e01dc114150
GitHub-Last-Rev: dd9a39a5551e5a3415ab765cf271fecdbbe89b4c
GitHub-Pull-Request: golang/go#69924
Cq-Include-Trybots: luci.golang.try:gotip-linux-arm
Reviewed-on: https://go-review.googlesource.com/c/go/+/620855
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
src/internal/runtime/atomic/atomic_arm.go
src/internal/runtime/atomic/atomic_arm.s
src/internal/runtime/atomic/xchg8_test.go

index 0909d224fcf38e66ba9d04b7fce0e9950f5315b8..dcc6ad99d6dea60498d3b46da5f2ceba0e943145 100644 (file)
@@ -74,6 +74,27 @@ func Xchg(addr *uint32, v uint32) uint32 {
        }
 }
 
+//go:noescape
+func Xchg8(addr *uint8, v uint8) uint8
+
+//go:nosplit
+func goXchg8(addr *uint8, v uint8) uint8 {
+       // Align down to 4 bytes and use 32-bit CAS.
+       addr32 := (*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(addr)) &^ 3))
+       shift := (uintptr(unsafe.Pointer(addr)) & 3) * 8 // little endian
+       word := uint32(v) << shift
+       mask := uint32(0xFF) << shift
+
+       for {
+               old := *addr32 // Read the old 32-bit value
+               // Clear the old 8 bits then insert the new value
+               if Cas(addr32, old, (old&^mask)|word) {
+                       // Return the old 8-bit value
+                       return uint8((old & mask) >> shift)
+               }
+       }
+}
+
 //go:nosplit
 func Xchguintptr(addr *uintptr, v uintptr) uintptr {
        return uintptr(Xchg((*uint32)(unsafe.Pointer(addr)), uint32(v)))
index 93214da8267192f80b9dcb6e72d6bd4d2c9cada1..85cee049af9993dd6252f1cdf40d4a47c1f2ca1c 100644 (file)
@@ -264,6 +264,23 @@ or8loop:
 
        RET
 
+TEXT armXchg8<>(SB),NOSPLIT,$0-9
+       // addr is already in R1
+       MOVB    v+4(FP), R2
+xchg8loop:
+       LDREXB  (R1), R6
+
+       DMB     MB_ISHST
+
+       STREXB  R2, (R1), R0
+       CMP     $0, R0
+       BNE     xchg8loop
+
+       DMB     MB_ISH
+
+       MOVB R6, ret+8(FP)
+       RET
+
 // The following functions all panic if their address argument isn't
 // 8-byte aligned. Since we're calling back into Go code to do this,
 // we have to cooperate with stack unwinding. In the normal case, the
@@ -374,3 +391,17 @@ TEXT ·Or8(SB),NOSPLIT,$-4-5
        JMP     ·goOr8(SB)
 #endif
        JMP     armOr8<>(SB)
+
+TEXT ·Xchg8(SB),NOSPLIT,$-4-9
+       NO_LOCAL_POINTERS
+       MOVW    addr+0(FP), R1
+
+       // Uses STREXB/LDREXB that is armv6k or later.
+       // For simplicity we only enable this on armv7.
+#ifndef GOARM_7
+       MOVB    internal∕cpu·ARM+const_offsetARMHasV7Atomics(SB), R11
+       CMP     $1, R11
+       BEQ     2(PC)
+       JMP     ·goXchg8(SB)
+#endif
+       JMP     armXchg8<>(SB)
index d9f1d8854ef68c9233a7e6a05702dd0ed6772f32..d9c0a8dd24362705a3fbf36c2b50d4acb1816c17 100644 (file)
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-//go:build 386 || amd64 || arm64 || ppc64 || ppc64le
+//go:build 386 || amd64 || arm || arm64 || ppc64 || ppc64le
 
 package atomic_test
 
@@ -37,3 +37,23 @@ func TestXchg8(t *testing.T) {
                }
        }
 }
+
+func BenchmarkXchg8(b *testing.B) {
+       var x [512]uint8 // give byte its own cache line
+       sink = &x
+       for i := 0; i < b.N; i++ {
+               atomic.Xchg8(&x[255], uint8(i))
+       }
+}
+
+func BenchmarkXchg8Parallel(b *testing.B) {
+       var x [512]uint8 // give byte its own cache line
+       sink = &x
+       b.RunParallel(func(pb *testing.PB) {
+               i := uint8(0)
+               for pb.Next() {
+                       atomic.Xchg8(&x[255], i)
+                       i++
+               }
+       })
+}