CMP R0, R2
BNE casfail
- MOVB runtime·goarm(SB), R11
- CMP $7, R11
+ MOVB runtime·goarm(SB), R8
+ CMP $7, R8
BLT 2(PC)
DMB MB_ISHST
BNE casl
MOVW $1, R0
- MOVB runtime·goarm(SB), R11
- CMP $7, R11
+ CMP $7, R8
BLT 2(PC)
DMB MB_ISH
MOVB R0, ret+12(FP)
RET
+// stubs
+
TEXT runtime∕internal∕atomic·Loadp(SB),NOSPLIT|NOFRAME,$0-8
B runtime∕internal∕atomic·Load(SB)
TEXT runtime∕internal∕atomic·Casuintptr(SB),NOSPLIT,$0-13
B runtime∕internal∕atomic·Cas(SB)
+TEXT runtime∕internal∕atomic·Casp1(SB),NOSPLIT,$0-13
+ B runtime∕internal∕atomic·Cas(SB)
+
TEXT runtime∕internal∕atomic·Loaduintptr(SB),NOSPLIT,$0-8
B runtime∕internal∕atomic·Load(SB)
TEXT runtime∕internal∕atomic·Storeuintptr(SB),NOSPLIT,$0-8
B runtime∕internal∕atomic·Store(SB)
+TEXT runtime∕internal∕atomic·StorepNoWB(SB),NOSPLIT,$0-8
+ B runtime∕internal∕atomic·Store(SB)
+
TEXT runtime∕internal∕atomic·Xadduintptr(SB),NOSPLIT,$0-12
B runtime∕internal∕atomic·Xadd(SB)
TEXT runtime∕internal∕atomic·Xaddint64(SB),NOSPLIT,$0-20
B runtime∕internal∕atomic·Xadd64(SB)
+
+// 64-bit atomics
+// The native ARM implementations use LDREXD/STREXD, which are
+// available on ARMv6k or later. We use them only on ARMv7.
+// On older ARM, we use Go implementations which simulate 64-bit
+// atomics with locks.
+
+TEXT armCas64<>(SB),NOSPLIT,$0-21
+ MOVW addr+0(FP), R1
+ // make unaligned atomic access panic
+ AND.S $7, R1, R2
+ BEQ 2(PC)
+ MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2.
+ MOVW old_lo+4(FP), R2
+ MOVW old_hi+8(FP), R3
+ MOVW new_lo+12(FP), R4
+ MOVW new_hi+16(FP), R5
+cas64loop:
+ LDREXD (R1), R6 // loads R6 and R7
+ CMP R2, R6
+ BNE cas64fail
+ CMP R3, R7
+ BNE cas64fail
+
+ DMB MB_ISHST
+
+ STREXD R4, (R1), R0 // stores R4 and R5
+ CMP $0, R0
+ BNE cas64loop
+ MOVW $1, R0
+
+ DMB MB_ISH
+
+ MOVBU R0, swapped+20(FP)
+ RET
+cas64fail:
+ MOVW $0, R0
+ MOVBU R0, swapped+20(FP)
+ RET
+
+TEXT armXadd64<>(SB),NOSPLIT,$0-20
+ MOVW addr+0(FP), R1
+ // make unaligned atomic access panic
+ AND.S $7, R1, R2
+ BEQ 2(PC)
+ MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2.
+ MOVW delta_lo+4(FP), R2
+ MOVW delta_hi+8(FP), R3
+
+add64loop:
+ LDREXD (R1), R4 // loads R4 and R5
+ ADD.S R2, R4
+ ADC R3, R5
+
+ DMB MB_ISHST
+
+ STREXD R4, (R1), R0 // stores R4 and R5
+ CMP $0, R0
+ BNE add64loop
+
+ DMB MB_ISH
+
+ MOVW R4, new_lo+12(FP)
+ MOVW R5, new_hi+16(FP)
+ RET
+
+TEXT armXchg64<>(SB),NOSPLIT,$0-20
+ MOVW addr+0(FP), R1
+ // make unaligned atomic access panic
+ AND.S $7, R1, R2
+ BEQ 2(PC)
+ MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2.
+ MOVW new_lo+4(FP), R2
+ MOVW new_hi+8(FP), R3
+
+swap64loop:
+ LDREXD (R1), R4 // loads R4 and R5
+
+ DMB MB_ISHST
+
+ STREXD R2, (R1), R0 // stores R2 and R3
+ CMP $0, R0
+ BNE swap64loop
+
+ DMB MB_ISH
+
+ MOVW R4, old_lo+12(FP)
+ MOVW R5, old_hi+16(FP)
+ RET
+
+TEXT armLoad64<>(SB),NOSPLIT,$0-12
+ MOVW addr+0(FP), R1
+ // make unaligned atomic access panic
+ AND.S $7, R1, R2
+ BEQ 2(PC)
+ MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2.
+
+ LDREXD (R1), R2 // loads R2 and R3
+ DMB MB_ISH
+
+ MOVW R2, val_lo+4(FP)
+ MOVW R3, val_hi+8(FP)
+ RET
+
+TEXT armStore64<>(SB),NOSPLIT,$0-12
+ MOVW addr+0(FP), R1
+ // make unaligned atomic access panic
+ AND.S $7, R1, R2
+ BEQ 2(PC)
+ MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2.
+ MOVW val_lo+4(FP), R2
+ MOVW val_hi+8(FP), R3
+
+store64loop:
+ LDREXD (R1), R4 // loads R4 and R5
+
+ DMB MB_ISHST
+
+ STREXD R2, (R1), R0 // stores R2 and R3
+ CMP $0, R0
+ BNE store64loop
+
+ DMB MB_ISH
+ RET
+
+TEXT ·Cas64(SB),NOSPLIT,$0-21
+ MOVB runtime·goarm(SB), R11
+ CMP $7, R11
+ BLT 2(PC)
+ JMP armCas64<>(SB)
+ JMP ·goCas64(SB)
+
+TEXT ·Xadd64(SB),NOSPLIT,$0-20
+ MOVB runtime·goarm(SB), R11
+ CMP $7, R11
+ BLT 2(PC)
+ JMP armXadd64<>(SB)
+ JMP ·goXadd64(SB)
+
+TEXT ·Xchg64(SB),NOSPLIT,$0-20
+ MOVB runtime·goarm(SB), R11
+ CMP $7, R11
+ BLT 2(PC)
+ JMP armXchg64<>(SB)
+ JMP ·goXchg64(SB)
+
+TEXT ·Load64(SB),NOSPLIT,$0-12
+ MOVB runtime·goarm(SB), R11
+ CMP $7, R11
+ BLT 2(PC)
+ JMP armLoad64<>(SB)
+ JMP ·goLoad64(SB)
+
+TEXT ·Store64(SB),NOSPLIT,$0-12
+ MOVB runtime·goarm(SB), R11
+ CMP $7, R11
+ BLT 2(PC)
+ JMP armStore64<>(SB)
+ JMP ·goStore64(SB)
return uintptr(Xchg((*uint32)(unsafe.Pointer(addr)), uint32(v)))
}
-//go:nosplit
-func StorepNoWB(addr unsafe.Pointer, v unsafe.Pointer) {
- for {
- old := *(*unsafe.Pointer)(addr)
- if Casp1((*unsafe.Pointer)(addr), old, v) {
- return
- }
- }
-}
+// Not noescape -- it installs a pointer to addr.
+func StorepNoWB(addr unsafe.Pointer, v unsafe.Pointer)
-//go:nosplit
-func Store(addr *uint32, v uint32) {
- for {
- old := *addr
- if Cas(addr, old, v) {
- return
- }
- }
-}
+//go:noescape
+func Store(addr *uint32, v uint32)
//go:nosplit
-func Cas64(addr *uint64, old, new uint64) bool {
+func goCas64(addr *uint64, old, new uint64) bool {
if uintptr(unsafe.Pointer(addr))&7 != 0 {
*(*int)(nil) = 0 // crash on unaligned uint64
}
}
//go:nosplit
-func Xadd64(addr *uint64, delta int64) uint64 {
+func goXadd64(addr *uint64, delta int64) uint64 {
if uintptr(unsafe.Pointer(addr))&7 != 0 {
*(*int)(nil) = 0 // crash on unaligned uint64
}
}
//go:nosplit
-func Xchg64(addr *uint64, v uint64) uint64 {
+func goXchg64(addr *uint64, v uint64) uint64 {
if uintptr(unsafe.Pointer(addr))&7 != 0 {
*(*int)(nil) = 0 // crash on unaligned uint64
}
}
//go:nosplit
-func Load64(addr *uint64) uint64 {
+func goLoad64(addr *uint64) uint64 {
if uintptr(unsafe.Pointer(addr))&7 != 0 {
*(*int)(nil) = 0 // crash on unaligned uint64
}
}
//go:nosplit
-func Store64(addr *uint64, v uint64) {
+func goStore64(addr *uint64, v uint64) {
if uintptr(unsafe.Pointer(addr))&7 != 0 {
*(*int)(nil) = 0 // crash on unaligned uint64
}
//go:noescape
func Loadp(addr unsafe.Pointer) unsafe.Pointer
+
+//go:noescape
+func Cas64(addr *uint64, old, new uint64) bool
+
+//go:noescape
+func Xadd64(addr *uint64, delta int64) uint64
+
+//go:noescape
+func Xchg64(addr *uint64, v uint64) uint64
+
+//go:noescape
+func Load64(addr *uint64) uint64
+
+//go:noescape
+func Store64(addr *uint64, v uint64)
MOVB R0, ret+12(FP)
RET
-TEXT runtime∕internal∕atomic·Casp1(SB),NOSPLIT,$0
- B runtime∕internal∕atomic·Cas(SB)
-
// As for cas, memory barriers are complicated on ARM, but the kernel
// provides a user helper. ARMv5 does not support SMP and has no
// memory barrier instruction at all. ARMv6 added SMP support and has
TEXT memory_barrier<>(SB),NOSPLIT|NOFRAME,$0
MOVW $0xffff0fa0, R15 // R15 is hardware PC.
-TEXT runtime∕internal∕atomic·Load(SB),NOSPLIT,$0-8
+TEXT ·Load(SB),NOSPLIT,$0-8
MOVW addr+0(FP), R0
MOVW (R0), R1
CMP $7, R11
BGE native_barrier
BL memory_barrier<>(SB)
- B prolog
+ B end
native_barrier:
DMB MB_ISH
-
-prolog:
+end:
MOVW R1, ret+4(FP)
RET
+
+TEXT ·Store(SB),NOSPLIT,$0-8
+ MOVW addr+0(FP), R1
+ MOVW v+4(FP), R2
+
+ MOVB runtime·goarm(SB), R8
+ CMP $7, R8
+ BGE native_barrier
+ BL memory_barrier<>(SB)
+ B store
+native_barrier:
+ DMB MB_ISH
+
+store:
+ MOVW R2, (R1)
+
+ CMP $7, R8
+ BGE native_barrier2
+ BL memory_barrier<>(SB)
+ RET
+native_barrier2:
+ DMB MB_ISH
+ RET