From c5d424bfca065fe6c92e384df61d84cfade2bfed Mon Sep 17 00:00:00 2001 From: Mauri de Souza Meneguzzo Date: Wed, 23 Oct 2024 17:06:39 +0000 Subject: [PATCH] internal/runtime/atomic: add arm native implementations of And8/Or8 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit With LDREXB/STREXB now available for the arm assembler we can implement these operations natively. The instructions are armv6k+ but for simplicity I only use them on armv7. Benchmark results for a raspberry Pi 3 model B+: goos: linux goarch: arm pkg: internal/runtime/atomic cpu: ARMv7 Processor rev 4 (v7l) │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ And8-4 127.65n ± 0% 68.74n ± 0% -46.15% (p=0.000 n=10) Change-Id: Ic87f307c35f7d7f56010980302f253056f6d54dc GitHub-Last-Rev: a7351802fd212704712b37d183435ab14e58f885 GitHub-Pull-Request: golang/go#70002 Cq-Include-Trybots: luci.golang.try:gotip-linux-arm Reviewed-on: https://go-review.googlesource.com/c/go/+/622075 Reviewed-by: Keith Randall Reviewed-by: Cherry Mui Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI --- src/internal/runtime/atomic/atomic_arm.go | 10 +++- src/internal/runtime/atomic/atomic_arm.s | 64 +++++++++++++++++++++++ 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/src/internal/runtime/atomic/atomic_arm.go b/src/internal/runtime/atomic/atomic_arm.go index b58f643ca3..0909d224fc 100644 --- a/src/internal/runtime/atomic/atomic_arm.go +++ b/src/internal/runtime/atomic/atomic_arm.go @@ -159,8 +159,11 @@ func goStore64(addr *uint64, v uint64) { addrLock(addr).unlock() } +//go:noescape +func Or8(addr *uint8, v uint8) + //go:nosplit -func Or8(addr *uint8, v uint8) { +func goOr8(addr *uint8, v uint8) { // Align down to 4 bytes and use 32-bit CAS. uaddr := uintptr(unsafe.Pointer(addr)) addr32 := (*uint32)(unsafe.Pointer(uaddr &^ 3)) @@ -173,8 +176,11 @@ func Or8(addr *uint8, v uint8) { } } +//go:noescape +func And8(addr *uint8, v uint8) + //go:nosplit -func And8(addr *uint8, v uint8) { +func goAnd8(addr *uint8, v uint8) { // Align down to 4 bytes and use 32-bit CAS. uaddr := uintptr(unsafe.Pointer(addr)) addr32 := (*uint32)(unsafe.Pointer(uaddr &^ 3)) diff --git a/src/internal/runtime/atomic/atomic_arm.s b/src/internal/runtime/atomic/atomic_arm.s index 1cf7d8f6ef..93214da826 100644 --- a/src/internal/runtime/atomic/atomic_arm.s +++ b/src/internal/runtime/atomic/atomic_arm.s @@ -228,6 +228,42 @@ store64loop: DMB MB_ISH RET +TEXT armAnd8<>(SB),NOSPLIT,$0-5 + // addr is already in R1 + MOVB v+4(FP), R2 + +and8loop: + LDREXB (R1), R6 + + DMB MB_ISHST + + AND R2, R6 + STREXB R6, (R1), R0 + CMP $0, R0 + BNE and8loop + + DMB MB_ISH + + RET + +TEXT armOr8<>(SB),NOSPLIT,$0-5 + // addr is already in R1 + MOVB v+4(FP), R2 + +or8loop: + LDREXB (R1), R6 + + DMB MB_ISHST + + ORR R2, R6 + STREXB R6, (R1), R0 + CMP $0, R0 + BNE or8loop + + DMB MB_ISH + + RET + // The following functions all panic if their address argument isn't // 8-byte aligned. Since we're calling back into Go code to do this, // we have to cooperate with stack unwinding. In the normal case, the @@ -310,3 +346,31 @@ TEXT ·Store64(SB),NOSPLIT,$-4-12 JMP ·goStore64(SB) #endif JMP armStore64<>(SB) + +TEXT ·And8(SB),NOSPLIT,$-4-5 + NO_LOCAL_POINTERS + MOVW addr+0(FP), R1 + +// Uses STREXB/LDREXB that is armv6k or later. +// For simplicity we only enable this on armv7. +#ifndef GOARM_7 + MOVB internal∕cpu·ARM+const_offsetARMHasV7Atomics(SB), R11 + CMP $1, R11 + BEQ 2(PC) + JMP ·goAnd8(SB) +#endif + JMP armAnd8<>(SB) + +TEXT ·Or8(SB),NOSPLIT,$-4-5 + NO_LOCAL_POINTERS + MOVW addr+0(FP), R1 + +// Uses STREXB/LDREXB that is armv6k or later. +// For simplicity we only enable this on armv7. +#ifndef GOARM_7 + MOVB internal∕cpu·ARM+const_offsetARMHasV7Atomics(SB), R11 + CMP $1, R11 + BEQ 2(PC) + JMP ·goOr8(SB) +#endif + JMP armOr8<>(SB) -- 2.48.1