s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
return s.newValue1(ssa.OpSelect0, types.Types[TUINT32], v)
},
- sys.PPC64)
+ sys.PPC64, sys.S390X)
addF("runtime/internal/atomic", "Loadp",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
v := s.newValue2(ssa.OpAtomicLoadPtr, types.NewTuple(s.f.Config.Types.BytePtr, types.TypeMem), args[0], s.mem())
s.vars[&memVar] = s.newValue3(ssa.OpAtomicStoreRel32, types.TypeMem, args[0], args[1], s.mem())
return nil
},
- sys.PPC64)
+ sys.PPC64, sys.S390X)
addF("runtime/internal/atomic", "Xchg",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
bne := s.Prog(s390x.ABNE)
bne.To.Type = obj.TYPE_BRANCH
gc.Patch(bne, cs)
+ case ssa.OpS390XSYNC:
+ s.Prog(s390x.ASYNC)
case ssa.OpClobber:
// TODO: implement for clobberdead experiment. Nop is ok for now.
default:
(RoundToEven x) -> (FIDBR [4] x)
(Round x) -> (FIDBR [1] x)
-// Atomic loads.
-(AtomicLoad8 ptr mem) -> (MOVBZatomicload ptr mem)
-(AtomicLoad32 ptr mem) -> (MOVWZatomicload ptr mem)
-(AtomicLoad64 ptr mem) -> (MOVDatomicload ptr mem)
-(AtomicLoadPtr ptr mem) -> (MOVDatomicload ptr mem)
-
-// Atomic stores.
-(AtomicStore32 ptr val mem) -> (MOVWatomicstore ptr val mem)
-(AtomicStore64 ptr val mem) -> (MOVDatomicstore ptr val mem)
-(AtomicStorePtrNoWB ptr val mem) -> (MOVDatomicstore ptr val mem)
+// Atomic loads and stores.
+// The SYNC instruction (fast-BCR-serialization) prevents store-load
+// reordering. Other sequences of memory operations (load-load,
+// store-store and load-store) are already guaranteed not to be reordered.
+(AtomicLoad(8|32|Acq32|64|Ptr) ptr mem) -> (MOV(BZ|WZ|WZ|D|D)atomicload ptr mem)
+(AtomicStore(32|64|PtrNoWB) ptr val mem) -> (SYNC (MOV(W|D|D)atomicstore ptr val mem))
+
+// Store-release doesn't require store-load ordering.
+(AtomicStoreRel32 ptr val mem) -> (MOVWatomicstore ptr val mem)
// Atomic adds.
(AtomicAdd32 ptr val mem) -> (AddTupleFirst32 val (LAA ptr val mem))
fpstore = regInfo{inputs: []regMask{ptrspsb, fp, 0}}
fpstoreidx = regInfo{inputs: []regMask{ptrsp, ptrsp, fp, 0}}
+ sync = regInfo{inputs: []regMask{0}}
+
// LoweredAtomicCas may overwrite arg1, so force it to R0 for now.
cas = regInfo{inputs: []regMask{ptrsp, r0, gpsp, 0}, outputs: []regMask{gp, 0}, clobbers: r0}
{name: "FlagGT"}, // CC=2 (greater than)
{name: "FlagOV"}, // CC=3 (overflow)
+ // Fast-BCR-serialization to ensure store-load ordering.
+ {name: "SYNC", argLength: 1, reg: sync, asm: "SYNC", typ: "Mem"},
+
// Atomic loads. These are just normal loads but return <value,memory> tuples
// so they can be properly ordered with other loads.
// load from arg0+auxint+aux. arg1=mem.
OpS390XFlagLT
OpS390XFlagGT
OpS390XFlagOV
+ OpS390XSYNC
OpS390XMOVBZatomicload
OpS390XMOVWZatomicload
OpS390XMOVDatomicload
argLen: 0,
reg: regInfo{},
},
+ {
+ name: "SYNC",
+ argLen: 1,
+ asm: s390x.ASYNC,
+ reg: regInfo{},
+ },
{
name: "MOVBZatomicload",
auxType: auxSymOff,
return rewriteValueS390X_OpAtomicLoad64_0(v)
case OpAtomicLoad8:
return rewriteValueS390X_OpAtomicLoad8_0(v)
+ case OpAtomicLoadAcq32:
+ return rewriteValueS390X_OpAtomicLoadAcq32_0(v)
case OpAtomicLoadPtr:
return rewriteValueS390X_OpAtomicLoadPtr_0(v)
case OpAtomicStore32:
return rewriteValueS390X_OpAtomicStore64_0(v)
case OpAtomicStorePtrNoWB:
return rewriteValueS390X_OpAtomicStorePtrNoWB_0(v)
+ case OpAtomicStoreRel32:
+ return rewriteValueS390X_OpAtomicStoreRel32_0(v)
case OpAvg64u:
return rewriteValueS390X_OpAvg64u_0(v)
case OpBitLen64:
return true
}
}
+func rewriteValueS390X_OpAtomicLoadAcq32_0(v *Value) bool {
+ // match: (AtomicLoadAcq32 ptr mem)
+ // cond:
+ // result: (MOVWZatomicload ptr mem)
+ for {
+ mem := v.Args[1]
+ ptr := v.Args[0]
+ v.reset(OpS390XMOVWZatomicload)
+ v.AddArg(ptr)
+ v.AddArg(mem)
+ return true
+ }
+}
func rewriteValueS390X_OpAtomicLoadPtr_0(v *Value) bool {
// match: (AtomicLoadPtr ptr mem)
// cond:
}
}
func rewriteValueS390X_OpAtomicStore32_0(v *Value) bool {
+ b := v.Block
// match: (AtomicStore32 ptr val mem)
// cond:
- // result: (MOVWatomicstore ptr val mem)
+ // result: (SYNC (MOVWatomicstore ptr val mem))
for {
mem := v.Args[2]
ptr := v.Args[0]
val := v.Args[1]
- v.reset(OpS390XMOVWatomicstore)
- v.AddArg(ptr)
- v.AddArg(val)
- v.AddArg(mem)
+ v.reset(OpS390XSYNC)
+ v0 := b.NewValue0(v.Pos, OpS390XMOVWatomicstore, types.TypeMem)
+ v0.AddArg(ptr)
+ v0.AddArg(val)
+ v0.AddArg(mem)
+ v.AddArg(v0)
return true
}
}
func rewriteValueS390X_OpAtomicStore64_0(v *Value) bool {
+ b := v.Block
// match: (AtomicStore64 ptr val mem)
// cond:
- // result: (MOVDatomicstore ptr val mem)
+ // result: (SYNC (MOVDatomicstore ptr val mem))
for {
mem := v.Args[2]
ptr := v.Args[0]
val := v.Args[1]
- v.reset(OpS390XMOVDatomicstore)
- v.AddArg(ptr)
- v.AddArg(val)
- v.AddArg(mem)
+ v.reset(OpS390XSYNC)
+ v0 := b.NewValue0(v.Pos, OpS390XMOVDatomicstore, types.TypeMem)
+ v0.AddArg(ptr)
+ v0.AddArg(val)
+ v0.AddArg(mem)
+ v.AddArg(v0)
return true
}
}
func rewriteValueS390X_OpAtomicStorePtrNoWB_0(v *Value) bool {
+ b := v.Block
// match: (AtomicStorePtrNoWB ptr val mem)
// cond:
- // result: (MOVDatomicstore ptr val mem)
+ // result: (SYNC (MOVDatomicstore ptr val mem))
+ for {
+ mem := v.Args[2]
+ ptr := v.Args[0]
+ val := v.Args[1]
+ v.reset(OpS390XSYNC)
+ v0 := b.NewValue0(v.Pos, OpS390XMOVDatomicstore, types.TypeMem)
+ v0.AddArg(ptr)
+ v0.AddArg(val)
+ v0.AddArg(mem)
+ v.AddArg(v0)
+ return true
+ }
+}
+func rewriteValueS390X_OpAtomicStoreRel32_0(v *Value) bool {
+ // match: (AtomicStoreRel32 ptr val mem)
+ // cond:
+ // result: (MOVWatomicstore ptr val mem)
for {
mem := v.Args[2]
ptr := v.Args[0]
val := v.Args[1]
- v.reset(OpS390XMOVDatomicstore)
+ v.reset(OpS390XMOVWatomicstore)
v.AddArg(ptr)
v.AddArg(val)
v.AddArg(mem)
#include "textflag.h"
+// func Store(ptr *uint32, val uint32)
+TEXT ·Store(SB), NOSPLIT, $0
+ MOVD ptr+0(FP), R2
+ MOVWZ val+8(FP), R3
+ MOVW R3, 0(R2)
+ SYNC
+ RET
+
+// func Store64(ptr *uint64, val uint64)
+TEXT ·Store64(SB), NOSPLIT, $0
+ MOVD ptr+0(FP), R2
+ MOVD val+8(FP), R3
+ MOVD R3, 0(R2)
+ SYNC
+ RET
+
+// func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
+TEXT ·StorepNoWB(SB), NOSPLIT, $0
+ MOVD ptr+0(FP), R2
+ MOVD val+8(FP), R3
+ MOVD R3, 0(R2)
+ SYNC
+ RET
+
// func Cas(ptr *uint32, old, new uint32) bool
// Atomically:
// if *ptr == old {
return *ptr
}
-//go:noinline
-//go:nosplit
-func Store(ptr *uint32, val uint32) {
- *ptr = val
-}
-
-//go:noinline
-//go:nosplit
-func Store64(ptr *uint64, val uint64) {
- *ptr = val
-}
+//go:noescape
+func Store(ptr *uint32, val uint32)
-//go:notinheap
-type noWB struct{}
+//go:noescape
+func Store64(ptr *uint64, val uint64)
// NO go:noescape annotation; see atomic_pointer.go.
-//go:noinline
-//go:nosplit
-func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer) {
- *(**noWB)(ptr) = (*noWB)(val)
-}
+func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
-//go:noinline
//go:nosplit
+//go:noinline
func StoreRel(ptr *uint32, val uint32) {
*ptr = val
}