MOVB val+4(FP), AX
XCHGB AX, 0(BX)
RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-8
+ MOVL ptr+0(FP), AX
+ MOVL val+4(FP), BX
+ LOCK
+ ORL BX, (AX)
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-8
+ MOVL ptr+0(FP), AX
+ MOVL val+4(FP), BX
+ LOCK
+ ANDL BX, (AX)
+ RET
LOCK
ANDB BX, (AX)
RET
+
+// func Or(addr *uint32, v uint32)
+TEXT runtime∕internal∕atomic·Or(SB), NOSPLIT, $0-12
+ MOVQ ptr+0(FP), AX
+ MOVL val+8(FP), BX
+ LOCK
+ ORL BX, (AX)
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT runtime∕internal∕atomic·And(SB), NOSPLIT, $0-12
+ MOVQ ptr+0(FP), AX
+ MOVL val+8(FP), BX
+ LOCK
+ ANDL BX, (AX)
+ RET
BEQ R4, -4(PC)
SYNC
RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-12
+ MOVV ptr+0(FP), R1
+ MOVW val+8(FP), R2
+
+ SYNC
+ LL (R1), R3
+ OR R2, R3
+ SC R3, (R1)
+ BEQ R3, -4(PC)
+ SYNC
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-12
+ MOVV ptr+0(FP), R1
+ MOVW val+8(FP), R2
+
+ SYNC
+ LL (R1), R3
+ AND R2, R3
+ SC R3, (R1)
+ BEQ R3, -4(PC)
+ SYNC
+ RET
BEQ R4, try_and8
SYNC
RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-8
+ MOVW ptr+0(FP), R1
+ MOVW val+4(FP), R2
+
+ SYNC
+ LL (R1), R3
+ OR R2, R3
+ SC R3, (R1)
+ BEQ R3, -4(PC)
+ SYNC
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-8
+ MOVW ptr+0(FP), R1
+ MOVW val+4(FP), R2
+
+ SYNC
+ LL (R1), R3
+ AND R2, R3
+ SC R3, (R1)
+ BEQ R3, -4(PC)
+ SYNC
+ RET
MOVBZ val+8(FP), R4
LWSYNC
again:
- LBAR (R3),R6
- AND R4,R6
- STBCCC R6,(R3)
+ LBAR (R3), R6
+ AND R4, R6
+ STBCCC R6, (R3)
+ BNE again
+ RET
+
+// func Or(addr *uint32, v uint32)
+TEXT runtime∕internal∕atomic·Or(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R3
+ MOVW val+8(FP), R4
+ LWSYNC
+again:
+ LWAR (R3), R6
+ OR R4, R6
+ STWCCC R6, (R3)
+ BNE again
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT runtime∕internal∕atomic·And(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R3
+ MOVW val+8(FP), R4
+ LWSYNC
+again:
+ LWAR (R3),R6
+ AND R4, R6
+ STWCCC R6, (R3)
BNE again
RET
// func Or8(addr *uint8, v uint8)
TEXT ·Or8(SB), NOSPLIT, $0-9
- MOVD ptr+0(FP), R3
- MOVBZ val+8(FP), R4
+ MOVD ptr+0(FP), R3
+ MOVBZ val+8(FP), R4
// We don't have atomic operations that work on individual bytes so we
// need to align addr down to a word boundary and create a mask
// containing v to OR with the entire word atomically.
// func And8(addr *uint8, v uint8)
TEXT ·And8(SB), NOSPLIT, $0-9
- MOVD ptr+0(FP), R3
- MOVBZ val+8(FP), R4
+ MOVD ptr+0(FP), R3
+ MOVBZ val+8(FP), R4
// We don't have atomic operations that work on individual bytes so we
// need to align addr down to a word boundary and create a mask
// containing v to AND with the entire word atomically.
RLL R5, R4, R4 // R4 = rotl(R4, R5)
LAN R4, R6, 0(R3) // R6 = *R3; *R3 &= R4; (atomic)
RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R3
+ MOVW val+8(FP), R4
+ LAO R4, R6, 0(R3) // R6 = *R3; *R3 |= R4; (atomic)
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R3
+ MOVW val+8(FP), R4
+ LAN R4, R6, 0(R3) // R6 = *R3; *R3 &= R4; (atomic)
+ RET
//go:noescape
func Or8(ptr *uint8, val uint8)
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
//go:noescape
//go:noescape
func Or8(ptr *uint8, val uint8)
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
//go:noescape
}
}
+//go:nosplit
+func Or(addr *uint32, v uint32) {
+ for {
+ old := *addr
+ if Cas(addr, old, old|v) {
+ return
+ }
+ }
+}
+
+//go:nosplit
+func And(addr *uint32, v uint32) {
+ for {
+ old := *addr
+ if Cas(addr, old, old&v) {
+ return
+ }
+ }
+}
+
//go:nosplit
func armcas(ptr *uint32, old, new uint32) bool
//go:noescape
func And8(ptr *uint8, val uint8)
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
//go:noescape
func Cas64(ptr *uint64, old, new uint64) bool
CBNZ R3, -3(PC)
RET
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R0
+ MOVW val+8(FP), R1
+ LDAXRW (R0), R2
+ AND R1, R2
+ STLXRW R2, (R0), R3
+ CBNZ R3, -3(PC)
+ RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R0
+ MOVW val+8(FP), R1
+ LDAXRW (R0), R2
+ ORR R1, R2
+ STLXRW R2, (R0), R3
+ CBNZ R3, -3(PC)
+ RET
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
//go:noescape
func Cas64(ptr *uint64, old, new uint64) bool
//go:noescape
func Or8(ptr *uint8, val uint8)
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
//go:noescape
func Store(ptr *uint32, val uint32)
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
//go:noescape
func Cas64(ptr *uint64, old, new uint64) bool
//go:noescape
func And8(ptr *uint8, val uint8)
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
//go:noescape
func Cas64(ptr *uint64, old, new uint64) bool
SLL A2, A1
AMOORW A1, (A0), ZERO
RET
+
+// func And(ptr *uint32, val uint32)
+TEXT ·And(SB), NOSPLIT, $0-12
+ MOV ptr+0(FP), A0
+ MOVW val+8(FP), A1
+ AMOANDW A1, (A0), ZERO
+ RET
+
+// func Or(ptr *uint32, val uint32)
+TEXT ·Or(SB), NOSPLIT, $0-12
+ MOV ptr+0(FP), A0
+ MOVW val+8(FP), A1
+ AMOORW A1, (A0), ZERO
+ RET
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
//go:noescape
func Xadd(ptr *uint32, delta int32) uint32
}
}
+func TestAnd(t *testing.T) {
+ // Basic sanity check.
+ x := uint32(0xffffffff)
+ for i := uint32(0); i < 32; i++ {
+ atomic.And(&x, ^(1 << i))
+ if r := uint32(0xffffffff) << (i + 1); x != r {
+ t.Fatalf("clearing bit %#x: want %#x, got %#x", uint32(1<<i), r, x)
+ }
+ }
+
+ // Set every bit in array to 1.
+ a := make([]uint32, 1<<12)
+ for i := range a {
+ a[i] = 0xffffffff
+ }
+
+ // Clear array bit-by-bit in different goroutines.
+ done := make(chan bool)
+ for i := 0; i < 32; i++ {
+ m := ^uint32(1 << i)
+ go func() {
+ for i := range a {
+ atomic.And(&a[i], m)
+ }
+ done <- true
+ }()
+ }
+ for i := 0; i < 32; i++ {
+ <-done
+ }
+
+ // Check that the array has been totally cleared.
+ for i, v := range a {
+ if v != 0 {
+ t.Fatalf("a[%v] not cleared: want %#x, got %#x", i, uint32(0), v)
+ }
+ }
+}
+
func TestOr8(t *testing.T) {
// Basic sanity check.
x := uint8(0)
}
}
-func TestBitwiseContended(t *testing.T) {
+func TestOr(t *testing.T) {
+ // Basic sanity check.
+ x := uint32(0)
+ for i := uint32(0); i < 32; i++ {
+ atomic.Or(&x, 1<<i)
+ if r := (uint32(1) << (i + 1)) - 1; x != r {
+ t.Fatalf("setting bit %#x: want %#x, got %#x", uint32(1)<<i, r, x)
+ }
+ }
+
+ // Start with every bit in array set to 0.
+ a := make([]uint32, 1<<12)
+
+ // Set every bit in array bit-by-bit in different goroutines.
+ done := make(chan bool)
+ for i := 0; i < 32; i++ {
+ m := uint32(1 << i)
+ go func() {
+ for i := range a {
+ atomic.Or(&a[i], m)
+ }
+ done <- true
+ }()
+ }
+ for i := 0; i < 32; i++ {
+ <-done
+ }
+
+ // Check that the array has been totally set.
+ for i, v := range a {
+ if v != 0xffffffff {
+ t.Fatalf("a[%v] not fully set: want %#x, got %#x", i, uint32(0xffffffff), v)
+ }
+ }
+}
+
+func TestBitwiseContended8(t *testing.T) {
// Start with every bit in array set to 0.
a := make([]uint8, 16)
}
}
+func TestBitwiseContended(t *testing.T) {
+ // Start with every bit in array set to 0.
+ a := make([]uint32, 16)
+
+ // Iterations to try.
+ N := 1 << 16
+ if testing.Short() {
+ N = 1 << 10
+ }
+
+ // Set and then clear every bit in the array bit-by-bit in different goroutines.
+ done := make(chan bool)
+ for i := 0; i < 32; i++ {
+ m := uint32(1 << i)
+ go func() {
+ for n := 0; n < N; n++ {
+ for i := range a {
+ atomic.Or(&a[i], m)
+ if atomic.Load(&a[i])&m != m {
+ t.Errorf("a[%v] bit %#x not set", i, m)
+ }
+ atomic.And(&a[i], ^m)
+ if atomic.Load(&a[i])&m != 0 {
+ t.Errorf("a[%v] bit %#x not clear", i, m)
+ }
+ }
+ }
+ done <- true
+ }()
+ }
+ for i := 0; i < 32; i++ {
+ <-done
+ }
+
+ // Check that the array has been totally cleared.
+ for i, v := range a {
+ if v != 0 {
+ t.Fatalf("a[%v] not cleared: want %#x, got %#x", i, uint32(0), v)
+ }
+ }
+}
+
func TestStorepNoWB(t *testing.T) {
var p [2]*int
for i := range p {
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
+//go:nosplit
+//go:noinline
+func And(ptr *uint32, val uint32) {
+ *ptr = *ptr & val
+}
+
+//go:nosplit
+//go:noinline
+func Or(ptr *uint32, val uint32) {
+ *ptr = *ptr | val
+}
+
//go:nosplit
//go:noinline
func Cas64(ptr *uint64, old, new uint64) bool {
}
}
+func BenchmarkAnd(b *testing.B) {
+ var x [128]uint32 // give x its own cache line
+ sink = &x
+ for i := 0; i < b.N; i++ {
+ atomic.And(&x[63], uint32(i))
+ }
+}
+
func BenchmarkAnd8Parallel(b *testing.B) {
var x [512]uint8 // give byte its own cache line
sink = &x
})
}
+func BenchmarkAndParallel(b *testing.B) {
+ var x [128]uint32 // give x its own cache line
+ sink = &x
+ b.RunParallel(func(pb *testing.PB) {
+ i := uint32(0)
+ for pb.Next() {
+ atomic.And(&x[63], i)
+ i++
+ }
+ })
+}
+
func BenchmarkOr8(b *testing.B) {
var x [512]uint8 // give byte its own cache line
sink = &x
}
}
+func BenchmarkOr(b *testing.B) {
+ var x [128]uint32 // give x its own cache line
+ sink = &x
+ for i := 0; i < b.N; i++ {
+ atomic.Or(&x[63], uint32(i))
+ }
+}
+
func BenchmarkOr8Parallel(b *testing.B) {
var x [512]uint8 // give byte its own cache line
sink = &x
})
}
+func BenchmarkOrParallel(b *testing.B) {
+ var x [128]uint32 // give x its own cache line
+ sink = &x
+ b.RunParallel(func(pb *testing.PB) {
+ i := uint32(0)
+ for pb.Next() {
+ atomic.Or(&x[63], i)
+ i++
+ }
+ })
+}
+
func BenchmarkXadd(b *testing.B) {
var x uint32
ptr := &x