]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile, runtime: add new lightweight atomics for ppc64x
authorCarlos Eduardo Seo <cseo@linux.vnet.ibm.com>
Mon, 6 Aug 2018 20:36:16 +0000 (15:36 -0500)
committerLynn Boger <laboger@linux.vnet.ibm.com>
Tue, 23 Oct 2018 18:10:38 +0000 (18:10 +0000)
This change creates the infrastructure for new lightweight atomics
primitives in runtime/internal/atomic:

- LoadAcq, for load-acquire
- StoreRel, for store-release
- CasRel, for Compare-and-Swap-release

and implements them for ppc64x. There is visible performance improvement
in producer-consumer scenarios, like BenchmarkChanProdCons*:

benchmark                           old ns/op     new ns/op     delta
BenchmarkChanProdCons0-48           2034          2034          +0.00%
BenchmarkChanProdCons10-48          1798          1608          -10.57%
BenchmarkChanProdCons100-48         1596          1585          -0.69%
BenchmarkChanProdConsWork0-48       2084          2046          -1.82%
BenchmarkChanProdConsWork10-48      1829          1668          -8.80%
BenchmarkChanProdConsWork100-48     1650          1650          +0.00%

Fixes #21348

Change-Id: I1f6ce377e4a0fe4bd7f5f775e8036f50070ad8db
Reviewed-on: https://go-review.googlesource.com/c/142277
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
29 files changed:
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/ppc64/ssa.go
src/cmd/compile/internal/ssa/gen/PPC64.rules
src/cmd/compile/internal/ssa/gen/PPC64Ops.go
src/cmd/compile/internal/ssa/gen/genericOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewritePPC64.go
src/runtime/internal/atomic/asm_386.s
src/runtime/internal/atomic/asm_amd64.s
src/runtime/internal/atomic/asm_amd64p32.s
src/runtime/internal/atomic/asm_arm.s
src/runtime/internal/atomic/asm_arm64.s
src/runtime/internal/atomic/asm_mips64x.s
src/runtime/internal/atomic/asm_mipsx.s
src/runtime/internal/atomic/asm_ppc64x.s
src/runtime/internal/atomic/asm_s390x.s
src/runtime/internal/atomic/atomic_386.go
src/runtime/internal/atomic/atomic_amd64x.go
src/runtime/internal/atomic/atomic_arm.go
src/runtime/internal/atomic/atomic_arm64.go
src/runtime/internal/atomic/atomic_arm64.s
src/runtime/internal/atomic/atomic_mips64x.go
src/runtime/internal/atomic/atomic_mips64x.s
src/runtime/internal/atomic/atomic_mipsx.go
src/runtime/internal/atomic/atomic_ppc64x.go
src/runtime/internal/atomic/atomic_ppc64x.s
src/runtime/internal/atomic/atomic_s390x.go
src/runtime/internal/atomic/atomic_wasm.go
src/runtime/proc.go

index 2ce59097af36a4c9b063ca42ce2707991880d9be..303658a3e118b10deca4d4a5bc4bc1e1484ed406 100644 (file)
@@ -2862,6 +2862,7 @@ func init() {
        var all []*sys.Arch
        var p4 []*sys.Arch
        var p8 []*sys.Arch
+       var lwatomics []*sys.Arch
        for _, a := range sys.Archs {
                all = append(all, a)
                if a.PtrSize == 4 {
@@ -2869,6 +2870,9 @@ func init() {
                } else {
                        p8 = append(p8, a)
                }
+               if a.Family != sys.PPC64 {
+                       lwatomics = append(lwatomics, a)
+               }
        }
 
        // add adds the intrinsic b for pkg.fn for the given list of architectures.
@@ -2985,6 +2989,13 @@ func init() {
                        return s.newValue1(ssa.OpSelect0, types.Types[TUINT64], v)
                },
                sys.AMD64, sys.ARM64, sys.S390X, sys.MIPS64, sys.PPC64)
+       addF("runtime/internal/atomic", "LoadAcq",
+               func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+                       v := s.newValue2(ssa.OpAtomicLoadAcq32, types.NewTuple(types.Types[TUINT32], types.TypeMem), args[0], s.mem())
+                       s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
+                       return s.newValue1(ssa.OpSelect0, types.Types[TUINT32], v)
+               },
+               sys.PPC64)
        addF("runtime/internal/atomic", "Loadp",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        v := s.newValue2(ssa.OpAtomicLoadPtr, types.NewTuple(s.f.Config.Types.BytePtr, types.TypeMem), args[0], s.mem())
@@ -3011,6 +3022,12 @@ func init() {
                        return nil
                },
                sys.AMD64, sys.ARM64, sys.S390X, sys.MIPS, sys.MIPS64)
+       addF("runtime/internal/atomic", "StoreRel",
+               func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+                       s.vars[&memVar] = s.newValue3(ssa.OpAtomicStoreRel32, types.TypeMem, args[0], args[1], s.mem())
+                       return nil
+               },
+               sys.PPC64)
 
        addF("runtime/internal/atomic", "Xchg",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
@@ -3098,6 +3115,13 @@ func init() {
                        return s.newValue1(ssa.OpSelect0, types.Types[TBOOL], v)
                },
                sys.AMD64, sys.ARM64, sys.S390X, sys.MIPS64, sys.PPC64)
+       addF("runtime/internal/atomic", "CasRel",
+               func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+                       v := s.newValue4(ssa.OpAtomicCompareAndSwap32, types.NewTuple(types.Types[TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
+                       s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
+                       return s.newValue1(ssa.OpSelect0, types.Types[TBOOL], v)
+               },
+               sys.PPC64)
 
        addF("runtime/internal/atomic", "And8",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
@@ -3118,8 +3142,10 @@ func init() {
        alias("runtime/internal/atomic", "Loaduint", "runtime/internal/atomic", "Load64", p8...)
        alias("runtime/internal/atomic", "Loaduintptr", "runtime/internal/atomic", "Load", p4...)
        alias("runtime/internal/atomic", "Loaduintptr", "runtime/internal/atomic", "Load64", p8...)
+       alias("runtime/internal/atomic", "LoadAcq", "runtime/internal/atomic", "Load", lwatomics...)
        alias("runtime/internal/atomic", "Storeuintptr", "runtime/internal/atomic", "Store", p4...)
        alias("runtime/internal/atomic", "Storeuintptr", "runtime/internal/atomic", "Store64", p8...)
+       alias("runtime/internal/atomic", "StoreRel", "runtime/internal/atomic", "Store", lwatomics...)
        alias("runtime/internal/atomic", "Xchguintptr", "runtime/internal/atomic", "Xchg", p4...)
        alias("runtime/internal/atomic", "Xchguintptr", "runtime/internal/atomic", "Xchg64", p8...)
        alias("runtime/internal/atomic", "Xadduintptr", "runtime/internal/atomic", "Xadd", p4...)
@@ -3128,6 +3154,7 @@ func init() {
        alias("runtime/internal/atomic", "Casuintptr", "runtime/internal/atomic", "Cas64", p8...)
        alias("runtime/internal/atomic", "Casp1", "runtime/internal/atomic", "Cas", p4...)
        alias("runtime/internal/atomic", "Casp1", "runtime/internal/atomic", "Cas64", p8...)
+       alias("runtime/internal/atomic", "CasRel", "runtime/internal/atomic", "Cas", lwatomics...)
 
        alias("runtime/internal/sys", "Ctz8", "math/bits", "TrailingZeros8", all...)
 
index bd6ffbce53a855ec8cb671761b5b423f2c1f86c4..a3f8b67177d56504ae677661f1f0aa182de77f1b 100644 (file)
@@ -313,9 +313,11 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                }
                arg0 := v.Args[0].Reg()
                out := v.Reg0()
-               // SYNC
-               psync := s.Prog(ppc64.ASYNC)
-               psync.To.Type = obj.TYPE_NONE
+               // SYNC when AuxInt == 1; otherwise, load-acquire
+               if v.AuxInt == 1 {
+                       psync := s.Prog(ppc64.ASYNC)
+                       psync.To.Type = obj.TYPE_NONE
+               }
                // Load
                p := s.Prog(ld)
                p.From.Type = obj.TYPE_MEM
@@ -338,7 +340,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 
        case ssa.OpPPC64LoweredAtomicStore32,
                ssa.OpPPC64LoweredAtomicStore64:
-               // SYNC
+               // SYNC or LWSYNC
                // MOVD/MOVW arg1,(arg0)
                st := ppc64.AMOVD
                if v.Op == ssa.OpPPC64LoweredAtomicStore32 {
@@ -346,8 +348,13 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                }
                arg0 := v.Args[0].Reg()
                arg1 := v.Args[1].Reg()
+               // If AuxInt == 0, LWSYNC (Store-Release), else SYNC
                // SYNC
-               psync := s.Prog(ppc64.ASYNC)
+               syncOp := ppc64.ASYNC
+               if v.AuxInt == 0 {
+                       syncOp = ppc64.ALWSYNC
+               }
+               psync := s.Prog(syncOp)
                psync.To.Type = obj.TYPE_NONE
                // Store
                p := s.Prog(st)
@@ -360,12 +367,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                ssa.OpPPC64LoweredAtomicCas32:
                // LWSYNC
                // loop:
-               // LDAR        (Rarg0), Rtmp
+               // LDAR        (Rarg0), MutexHint, Rtmp
                // CMP         Rarg1, Rtmp
                // BNE         fail
                // STDCCC      Rarg2, (Rarg0)
                // BNE         loop
-               // LWSYNC
+               // LWSYNC      // Only for sequential consistency; not required in CasRel.
                // MOVD        $1, Rout
                // BR          end
                // fail:
@@ -393,6 +400,11 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.From.Reg = r0
                p.To.Type = obj.TYPE_REG
                p.To.Reg = ppc64.REGTMP
+               // If it is a Compare-and-Swap-Release operation, set the EH field with
+               // the release hint.
+               if v.AuxInt == 0 {
+                       p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
+               }
                // CMP reg1,reg2
                p1 := s.Prog(cmp)
                p1.From.Type = obj.TYPE_REG
@@ -414,8 +426,11 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                gc.Patch(p4, p)
                // LWSYNC - Assuming shared data not write-through-required nor
                // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
-               plwsync2 := s.Prog(ppc64.ALWSYNC)
-               plwsync2.To.Type = obj.TYPE_NONE
+               // If the operation is a CAS-Release, then synchronization is not necessary.
+               if v.AuxInt != 0 {
+                       plwsync2 := s.Prog(ppc64.ALWSYNC)
+                       plwsync2.To.Type = obj.TYPE_NONE
+               }
                // return true
                p5 := s.Prog(ppc64.AMOVD)
                p5.From.Type = obj.TYPE_CONST
index be1bd6de0bf562c332e889131a18076df91a842c..0eaa88596ba96b5ba4def326a00f14752ffca49b 100644 (file)
     (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} x mem)
 
 // atomic intrinsics
-(AtomicLoad(32|64|Ptr)  ptr mem) -> (LoweredAtomicLoad(32|64|Ptr) ptr mem)
+(AtomicLoad(32|64|Ptr)  ptr mem) -> (LoweredAtomicLoad(32|64|Ptr) [1] ptr mem)
+(AtomicLoadAcq32        ptr mem) -> (LoweredAtomicLoad32 [0] ptr mem)
 
-(AtomicStore(32|64)      ptr val mem) -> (LoweredAtomicStore(32|64) ptr val mem)
+(AtomicStore(32|64)      ptr val mem) -> (LoweredAtomicStore(32|64) [1] ptr val mem)
+(AtomicStoreRel32        ptr val mem) -> (LoweredAtomicStore32 [0] ptr val mem)
 //(AtomicStorePtrNoWB ptr val mem) -> (STLR  ptr val mem)
 
 (AtomicExchange(32|64) ptr val mem) -> (LoweredAtomicExchange(32|64) ptr val mem)
 
 (AtomicAdd(32|64) ptr val mem) -> (LoweredAtomicAdd(32|64) ptr val mem)
 
-(AtomicCompareAndSwap(32|64) ptr old new_ mem) -> (LoweredAtomicCas(32|64) ptr old new_ mem)
+(AtomicCompareAndSwap(32|64) ptr old new_ mem) -> (LoweredAtomicCas(32|64) [1] ptr old new_ mem)
+(AtomicCompareAndSwapRel32   ptr old new_ mem) -> (LoweredAtomicCas32 [0] ptr old new_ mem)
 
 (AtomicAnd8 ptr val mem) -> (LoweredAtomicAnd8 ptr val mem)
 (AtomicOr8  ptr val mem) -> (LoweredAtomicOr8  ptr val mem)
index c82f7312fe6d9b7a9904d0bd989e0f9dc5099ed9..ef0db69fb71303cb16b2f966a735a22d0d3079ae 100644 (file)
@@ -470,12 +470,12 @@ func init() {
                        faultOnNilArg1: true,
                },
 
-               {name: "LoweredAtomicStore32", argLength: 3, reg: gpstore, typ: "Mem", faultOnNilArg0: true, hasSideEffects: true},
-               {name: "LoweredAtomicStore64", argLength: 3, reg: gpstore, typ: "Mem", faultOnNilArg0: true, hasSideEffects: true},
+               {name: "LoweredAtomicStore32", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
+               {name: "LoweredAtomicStore64", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
 
-               {name: "LoweredAtomicLoad32", argLength: 2, reg: gpload, typ: "UInt32", clobberFlags: true, faultOnNilArg0: true},
-               {name: "LoweredAtomicLoad64", argLength: 2, reg: gpload, typ: "Int64", clobberFlags: true, faultOnNilArg0: true},
-               {name: "LoweredAtomicLoadPtr", argLength: 2, reg: gpload, typ: "Int64", clobberFlags: true, faultOnNilArg0: true},
+               {name: "LoweredAtomicLoad32", argLength: 2, reg: gpload, typ: "UInt32", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
+               {name: "LoweredAtomicLoad64", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
+               {name: "LoweredAtomicLoadPtr", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
 
                // atomic add32, 64
                // SYNC
@@ -516,8 +516,8 @@ func init() {
                // BNE          -4(PC)
                // CBNZ         Rtmp, -4(PC)
                // CSET         EQ, Rout
-               {name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
-               {name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
+               {name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
+               {name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
 
                // atomic 8 and/or.
                // *arg0 &= (|=) arg1. arg2=mem. returns memory. auxint must be zero.
index 2f28ed45d076e1595542165772bf0c60fb697548..7ff6da1b01f26b3f43edbed0c5bc3ae5d8539a2d 100644 (file)
@@ -512,20 +512,23 @@ var genericOps = []opData{
        // Atomic loads return a new memory so that the loads are properly ordered
        // with respect to other loads and stores.
        // TODO: use for sync/atomic at some point.
-       {name: "AtomicLoad32", argLength: 2, typ: "(UInt32,Mem)"},                               // Load from arg0.  arg1=memory.  Returns loaded value and new memory.
-       {name: "AtomicLoad64", argLength: 2, typ: "(UInt64,Mem)"},                               // Load from arg0.  arg1=memory.  Returns loaded value and new memory.
-       {name: "AtomicLoadPtr", argLength: 2, typ: "(BytePtr,Mem)"},                             // Load from arg0.  arg1=memory.  Returns loaded value and new memory.
-       {name: "AtomicStore32", argLength: 3, typ: "Mem", hasSideEffects: true},                 // Store arg1 to *arg0.  arg2=memory.  Returns memory.
-       {name: "AtomicStore64", argLength: 3, typ: "Mem", hasSideEffects: true},                 // Store arg1 to *arg0.  arg2=memory.  Returns memory.
-       {name: "AtomicStorePtrNoWB", argLength: 3, typ: "Mem", hasSideEffects: true},            // Store arg1 to *arg0.  arg2=memory.  Returns memory.
-       {name: "AtomicExchange32", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true},     // Store arg1 to *arg0.  arg2=memory.  Returns old contents of *arg0 and new memory.
-       {name: "AtomicExchange64", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true},     // Store arg1 to *arg0.  arg2=memory.  Returns old contents of *arg0 and new memory.
-       {name: "AtomicAdd32", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true},          // Do *arg0 += arg1.  arg2=memory.  Returns sum and new memory.
-       {name: "AtomicAdd64", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true},          // Do *arg0 += arg1.  arg2=memory.  Returns sum and new memory.
-       {name: "AtomicCompareAndSwap32", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2.  Returns true iff store happens and new memory.
-       {name: "AtomicCompareAndSwap64", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2.  Returns true iff store happens and new memory.
-       {name: "AtomicAnd8", argLength: 3, typ: "Mem", hasSideEffects: true},                    // *arg0 &= arg1.  arg2=memory.  Returns memory.
-       {name: "AtomicOr8", argLength: 3, typ: "Mem", hasSideEffects: true},                     // *arg0 |= arg1.  arg2=memory.  Returns memory.
+       {name: "AtomicLoad32", argLength: 2, typ: "(UInt32,Mem)"},                                  // Load from arg0.  arg1=memory.  Returns loaded value and new memory.
+       {name: "AtomicLoad64", argLength: 2, typ: "(UInt64,Mem)"},                                  // Load from arg0.  arg1=memory.  Returns loaded value and new memory.
+       {name: "AtomicLoadPtr", argLength: 2, typ: "(BytePtr,Mem)"},                                // Load from arg0.  arg1=memory.  Returns loaded value and new memory.
+       {name: "AtomicLoadAcq32", argLength: 2, typ: "(UInt32,Mem)"},                               // Load from arg0.  arg1=memory.  Lock acquisition, returns loaded value and new memory.
+       {name: "AtomicStore32", argLength: 3, typ: "Mem", hasSideEffects: true},                    // Store arg1 to *arg0.  arg2=memory.  Returns memory.
+       {name: "AtomicStore64", argLength: 3, typ: "Mem", hasSideEffects: true},                    // Store arg1 to *arg0.  arg2=memory.  Returns memory.
+       {name: "AtomicStorePtrNoWB", argLength: 3, typ: "Mem", hasSideEffects: true},               // Store arg1 to *arg0.  arg2=memory.  Returns memory.
+       {name: "AtomicStoreRel32", argLength: 3, typ: "Mem", hasSideEffects: true},                 // Store arg1 to *arg0.  arg2=memory.  Lock release, returns memory.
+       {name: "AtomicExchange32", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true},        // Store arg1 to *arg0.  arg2=memory.  Returns old contents of *arg0 and new memory.
+       {name: "AtomicExchange64", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true},        // Store arg1 to *arg0.  arg2=memory.  Returns old contents of *arg0 and new memory.
+       {name: "AtomicAdd32", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true},             // Do *arg0 += arg1.  arg2=memory.  Returns sum and new memory.
+       {name: "AtomicAdd64", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true},             // Do *arg0 += arg1.  arg2=memory.  Returns sum and new memory.
+       {name: "AtomicCompareAndSwap32", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true},    // if *arg0==arg1, then set *arg0=arg2.  Returns true if store happens and new memory.
+       {name: "AtomicCompareAndSwap64", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true},    // if *arg0==arg1, then set *arg0=arg2.  Returns true if store happens and new memory.
+       {name: "AtomicCompareAndSwapRel32", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2.  Lock release, returns true if store happens and new memory.
+       {name: "AtomicAnd8", argLength: 3, typ: "Mem", hasSideEffects: true},                       // *arg0 &= arg1.  arg2=memory.  Returns memory.
+       {name: "AtomicOr8", argLength: 3, typ: "Mem", hasSideEffects: true},                        // *arg0 |= arg1.  arg2=memory.  Returns memory.
 
        // Atomic operation variants
        // These variants have the same semantics as above atomic operations.
index ad6c151d1dc83c584d10b668d6db5da1e39097bc..1435caf26a6ab4bc48c01a806716ef5f470688c7 100644 (file)
@@ -2409,15 +2409,18 @@ const (
        OpAtomicLoad32
        OpAtomicLoad64
        OpAtomicLoadPtr
+       OpAtomicLoadAcq32
        OpAtomicStore32
        OpAtomicStore64
        OpAtomicStorePtrNoWB
+       OpAtomicStoreRel32
        OpAtomicExchange32
        OpAtomicExchange64
        OpAtomicAdd32
        OpAtomicAdd64
        OpAtomicCompareAndSwap32
        OpAtomicCompareAndSwap64
+       OpAtomicCompareAndSwapRel32
        OpAtomicAnd8
        OpAtomicOr8
        OpAtomicAdd32Variant
@@ -23449,6 +23452,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:           "LoweredAtomicStore32",
+               auxType:        auxInt64,
                argLen:         3,
                faultOnNilArg0: true,
                hasSideEffects: true,
@@ -23461,6 +23465,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:           "LoweredAtomicStore64",
+               auxType:        auxInt64,
                argLen:         3,
                faultOnNilArg0: true,
                hasSideEffects: true,
@@ -23473,6 +23478,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:           "LoweredAtomicLoad32",
+               auxType:        auxInt64,
                argLen:         2,
                clobberFlags:   true,
                faultOnNilArg0: true,
@@ -23487,6 +23493,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:           "LoweredAtomicLoad64",
+               auxType:        auxInt64,
                argLen:         2,
                clobberFlags:   true,
                faultOnNilArg0: true,
@@ -23501,6 +23508,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:           "LoweredAtomicLoadPtr",
+               auxType:        auxInt64,
                argLen:         2,
                clobberFlags:   true,
                faultOnNilArg0: true,
@@ -23583,6 +23591,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:            "LoweredAtomicCas64",
+               auxType:         auxInt64,
                argLen:          4,
                resultNotInArgs: true,
                clobberFlags:    true,
@@ -23601,6 +23610,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:            "LoweredAtomicCas32",
+               auxType:         auxInt64,
                argLen:          4,
                resultNotInArgs: true,
                clobberFlags:    true,
@@ -29701,6 +29711,11 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
+       {
+               name:    "AtomicLoadAcq32",
+               argLen:  2,
+               generic: true,
+       },
        {
                name:           "AtomicStore32",
                argLen:         3,
@@ -29719,6 +29734,12 @@ var opcodeTable = [...]opInfo{
                hasSideEffects: true,
                generic:        true,
        },
+       {
+               name:           "AtomicStoreRel32",
+               argLen:         3,
+               hasSideEffects: true,
+               generic:        true,
+       },
        {
                name:           "AtomicExchange32",
                argLen:         3,
@@ -29755,6 +29776,12 @@ var opcodeTable = [...]opInfo{
                hasSideEffects: true,
                generic:        true,
        },
+       {
+               name:           "AtomicCompareAndSwapRel32",
+               argLen:         4,
+               hasSideEffects: true,
+               generic:        true,
+       },
        {
                name:           "AtomicAnd8",
                argLen:         3,
index 8f100c1a38c1869518100bd651681e12e1ef0cce..dad036d3d26f5231986c97ea97592c233ccd29b4 100644 (file)
@@ -53,6 +53,8 @@ func rewriteValuePPC64(v *Value) bool {
                return rewriteValuePPC64_OpAtomicCompareAndSwap32_0(v)
        case OpAtomicCompareAndSwap64:
                return rewriteValuePPC64_OpAtomicCompareAndSwap64_0(v)
+       case OpAtomicCompareAndSwapRel32:
+               return rewriteValuePPC64_OpAtomicCompareAndSwapRel32_0(v)
        case OpAtomicExchange32:
                return rewriteValuePPC64_OpAtomicExchange32_0(v)
        case OpAtomicExchange64:
@@ -61,6 +63,8 @@ func rewriteValuePPC64(v *Value) bool {
                return rewriteValuePPC64_OpAtomicLoad32_0(v)
        case OpAtomicLoad64:
                return rewriteValuePPC64_OpAtomicLoad64_0(v)
+       case OpAtomicLoadAcq32:
+               return rewriteValuePPC64_OpAtomicLoadAcq32_0(v)
        case OpAtomicLoadPtr:
                return rewriteValuePPC64_OpAtomicLoadPtr_0(v)
        case OpAtomicOr8:
@@ -69,6 +73,8 @@ func rewriteValuePPC64(v *Value) bool {
                return rewriteValuePPC64_OpAtomicStore32_0(v)
        case OpAtomicStore64:
                return rewriteValuePPC64_OpAtomicStore64_0(v)
+       case OpAtomicStoreRel32:
+               return rewriteValuePPC64_OpAtomicStoreRel32_0(v)
        case OpAvg64u:
                return rewriteValuePPC64_OpAvg64u_0(v)
        case OpBitLen32:
@@ -931,7 +937,7 @@ func rewriteValuePPC64_OpAtomicAnd8_0(v *Value) bool {
 func rewriteValuePPC64_OpAtomicCompareAndSwap32_0(v *Value) bool {
        // match: (AtomicCompareAndSwap32 ptr old new_ mem)
        // cond:
-       // result: (LoweredAtomicCas32 ptr old new_ mem)
+       // result: (LoweredAtomicCas32 [1] ptr old new_ mem)
        for {
                _ = v.Args[3]
                ptr := v.Args[0]
@@ -939,6 +945,7 @@ func rewriteValuePPC64_OpAtomicCompareAndSwap32_0(v *Value) bool {
                new_ := v.Args[2]
                mem := v.Args[3]
                v.reset(OpPPC64LoweredAtomicCas32)
+               v.AuxInt = 1
                v.AddArg(ptr)
                v.AddArg(old)
                v.AddArg(new_)
@@ -949,7 +956,7 @@ func rewriteValuePPC64_OpAtomicCompareAndSwap32_0(v *Value) bool {
 func rewriteValuePPC64_OpAtomicCompareAndSwap64_0(v *Value) bool {
        // match: (AtomicCompareAndSwap64 ptr old new_ mem)
        // cond:
-       // result: (LoweredAtomicCas64 ptr old new_ mem)
+       // result: (LoweredAtomicCas64 [1] ptr old new_ mem)
        for {
                _ = v.Args[3]
                ptr := v.Args[0]
@@ -957,6 +964,26 @@ func rewriteValuePPC64_OpAtomicCompareAndSwap64_0(v *Value) bool {
                new_ := v.Args[2]
                mem := v.Args[3]
                v.reset(OpPPC64LoweredAtomicCas64)
+               v.AuxInt = 1
+               v.AddArg(ptr)
+               v.AddArg(old)
+               v.AddArg(new_)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValuePPC64_OpAtomicCompareAndSwapRel32_0(v *Value) bool {
+       // match: (AtomicCompareAndSwapRel32 ptr old new_ mem)
+       // cond:
+       // result: (LoweredAtomicCas32 [0] ptr old new_ mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               old := v.Args[1]
+               new_ := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpPPC64LoweredAtomicCas32)
+               v.AuxInt = 0
                v.AddArg(ptr)
                v.AddArg(old)
                v.AddArg(new_)
@@ -999,12 +1026,13 @@ func rewriteValuePPC64_OpAtomicExchange64_0(v *Value) bool {
 func rewriteValuePPC64_OpAtomicLoad32_0(v *Value) bool {
        // match: (AtomicLoad32 ptr mem)
        // cond:
-       // result: (LoweredAtomicLoad32 ptr mem)
+       // result: (LoweredAtomicLoad32 [1] ptr mem)
        for {
                _ = v.Args[1]
                ptr := v.Args[0]
                mem := v.Args[1]
                v.reset(OpPPC64LoweredAtomicLoad32)
+               v.AuxInt = 1
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
@@ -1013,12 +1041,28 @@ func rewriteValuePPC64_OpAtomicLoad32_0(v *Value) bool {
 func rewriteValuePPC64_OpAtomicLoad64_0(v *Value) bool {
        // match: (AtomicLoad64 ptr mem)
        // cond:
-       // result: (LoweredAtomicLoad64 ptr mem)
+       // result: (LoweredAtomicLoad64 [1] ptr mem)
        for {
                _ = v.Args[1]
                ptr := v.Args[0]
                mem := v.Args[1]
                v.reset(OpPPC64LoweredAtomicLoad64)
+               v.AuxInt = 1
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValuePPC64_OpAtomicLoadAcq32_0(v *Value) bool {
+       // match: (AtomicLoadAcq32 ptr mem)
+       // cond:
+       // result: (LoweredAtomicLoad32 [0] ptr mem)
+       for {
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               mem := v.Args[1]
+               v.reset(OpPPC64LoweredAtomicLoad32)
+               v.AuxInt = 0
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
@@ -1027,12 +1071,13 @@ func rewriteValuePPC64_OpAtomicLoad64_0(v *Value) bool {
 func rewriteValuePPC64_OpAtomicLoadPtr_0(v *Value) bool {
        // match: (AtomicLoadPtr ptr mem)
        // cond:
-       // result: (LoweredAtomicLoadPtr ptr mem)
+       // result: (LoweredAtomicLoadPtr [1] ptr mem)
        for {
                _ = v.Args[1]
                ptr := v.Args[0]
                mem := v.Args[1]
                v.reset(OpPPC64LoweredAtomicLoadPtr)
+               v.AuxInt = 1
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
@@ -1057,13 +1102,14 @@ func rewriteValuePPC64_OpAtomicOr8_0(v *Value) bool {
 func rewriteValuePPC64_OpAtomicStore32_0(v *Value) bool {
        // match: (AtomicStore32 ptr val mem)
        // cond:
-       // result: (LoweredAtomicStore32 ptr val mem)
+       // result: (LoweredAtomicStore32 [1] ptr val mem)
        for {
                _ = v.Args[2]
                ptr := v.Args[0]
                val := v.Args[1]
                mem := v.Args[2]
                v.reset(OpPPC64LoweredAtomicStore32)
+               v.AuxInt = 1
                v.AddArg(ptr)
                v.AddArg(val)
                v.AddArg(mem)
@@ -1073,13 +1119,31 @@ func rewriteValuePPC64_OpAtomicStore32_0(v *Value) bool {
 func rewriteValuePPC64_OpAtomicStore64_0(v *Value) bool {
        // match: (AtomicStore64 ptr val mem)
        // cond:
-       // result: (LoweredAtomicStore64 ptr val mem)
+       // result: (LoweredAtomicStore64 [1] ptr val mem)
        for {
                _ = v.Args[2]
                ptr := v.Args[0]
                val := v.Args[1]
                mem := v.Args[2]
                v.reset(OpPPC64LoweredAtomicStore64)
+               v.AuxInt = 1
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValuePPC64_OpAtomicStoreRel32_0(v *Value) bool {
+       // match: (AtomicStoreRel32 ptr val mem)
+       // cond:
+       // result: (LoweredAtomicStore32 [0] ptr val mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpPPC64LoweredAtomicStore32)
+               v.AuxInt = 0
                v.AddArg(ptr)
                v.AddArg(val)
                v.AddArg(mem)
index 86a3ef33b9b3a138b06dcda89e89f5c2f82f12c3..13289a88d068b7e2abed9e1ae0f204f3e64a1511 100644 (file)
@@ -23,6 +23,9 @@ TEXT runtime∕internal∕atomic·Cas(SB), NOSPLIT, $0-13
 TEXT runtime∕internal∕atomic·Casuintptr(SB), NOSPLIT, $0-13
        JMP     runtime∕internal∕atomic·Cas(SB)
 
+TEXT runtime∕internal∕atomic·CasRel(SB), NOSPLIT, $0-13
+       JMP     runtime∕internal∕atomic·Cas(SB)
+
 TEXT runtime∕internal∕atomic·Loaduintptr(SB), NOSPLIT, $0-8
        JMP     runtime∕internal∕atomic·Load(SB)
 
@@ -180,6 +183,9 @@ TEXT runtime∕internal∕atomic·Store(SB), NOSPLIT, $0-8
        XCHGL   AX, 0(BX)
        RET
 
+TEXT runtime∕internal∕atomic·StoreRel(SB), NOSPLIT, $0-8
+       JMP     runtime∕internal∕atomic·Store(SB)
+
 // uint64 atomicload64(uint64 volatile* addr);
 TEXT runtime∕internal∕atomic·Load64(SB), NOSPLIT, $0-12
        MOVL    ptr+0(FP), AX
index 6fb5211c9cedd225d3e4b472406e5ad936b6f5c5..e18aee7d59e3106f211ac11c645b3b877257ba2e 100644 (file)
@@ -43,6 +43,9 @@ TEXT runtime∕internal∕atomic·Cas64(SB), NOSPLIT, $0-25
 TEXT runtime∕internal∕atomic·Casuintptr(SB), NOSPLIT, $0-25
        JMP     runtime∕internal∕atomic·Cas64(SB)
 
+TEXT runtime∕internal∕atomic·CasRel(SB), NOSPLIT, $0-17
+       JMP     runtime∕internal∕atomic·Cas(SB)
+
 TEXT runtime∕internal∕atomic·Loaduintptr(SB), NOSPLIT, $0-16
        JMP     runtime∕internal∕atomic·Load64(SB)
 
@@ -130,6 +133,9 @@ TEXT runtime∕internal∕atomic·Store(SB), NOSPLIT, $0-12
        XCHGL   AX, 0(BX)
        RET
 
+TEXT runtime∕internal∕atomic·StoreRel(SB), NOSPLIT, $0-12
+       JMP     runtime∕internal∕atomic·Store(SB)
+
 TEXT runtime∕internal∕atomic·Store64(SB), NOSPLIT, $0-16
        MOVQ    ptr+0(FP), BX
        MOVQ    val+8(FP), AX
index ff590e601b2320e6c959fdc2dd5d2ee30753023e..35b5ef205ec2a0d97a2951d026ad165c5c8db334 100644 (file)
@@ -23,6 +23,9 @@ TEXT runtime∕internal∕atomic·Cas(SB), NOSPLIT, $0-17
 TEXT runtime∕internal∕atomic·Casuintptr(SB), NOSPLIT, $0-17
        JMP     runtime∕internal∕atomic·Cas(SB)
 
+TEXT runtime∕internal∕atomic·CasRel(SB), NOSPLIT, $0-17
+       JMP     runtime∕internal∕atomic·Cas(SB)
+
 TEXT runtime∕internal∕atomic·Loaduintptr(SB), NOSPLIT, $0-12
        JMP     runtime∕internal∕atomic·Load(SB)
 
@@ -130,6 +133,9 @@ TEXT runtime∕internal∕atomic·Store(SB), NOSPLIT, $0-8
        XCHGL   AX, 0(BX)
        RET
 
+TEXT runtime∕internal∕atomic·StoreRel(SB), NOSPLIT, $0-8
+       JMP     runtime∕internal∕atomic·Store(SB)
+
 TEXT runtime∕internal∕atomic·Store64(SB), NOSPLIT, $0-16
        MOVL    ptr+0(FP), BX
        MOVQ    val+8(FP), AX
index 09724c1c34a62c70402c103cafac6551c7b5ca85..d4ef11560e7733d3ebc0afa5fbede7d2a852f01e 100644 (file)
@@ -53,12 +53,18 @@ casfail:
 TEXT runtime∕internal∕atomic·Loadp(SB),NOSPLIT|NOFRAME,$0-8
        B runtime∕internal∕atomic·Load(SB)
 
+TEXT runtime∕internal∕atomic·LoadAcq(SB),NOSPLIT|NOFRAME,$0-8
+       B runtime∕internal∕atomic·Load(SB)
+
 TEXT runtime∕internal∕atomic·Casuintptr(SB),NOSPLIT,$0-13
        B       runtime∕internal∕atomic·Cas(SB)
 
 TEXT runtime∕internal∕atomic·Casp1(SB),NOSPLIT,$0-13
        B       runtime∕internal∕atomic·Cas(SB)
 
+TEXT runtime∕internal∕atomic·CasRel(SB),NOSPLIT,$0-13
+       B       runtime∕internal∕atomic·Cas(SB)
+
 TEXT runtime∕internal∕atomic·Loaduintptr(SB),NOSPLIT,$0-8
        B       runtime∕internal∕atomic·Load(SB)
 
@@ -71,6 +77,9 @@ TEXT runtime∕internal∕atomic·Storeuintptr(SB),NOSPLIT,$0-8
 TEXT runtime∕internal∕atomic·StorepNoWB(SB),NOSPLIT,$0-8
        B       runtime∕internal∕atomic·Store(SB)
 
+TEXT runtime∕internal∕atomic·StoreRel(SB),NOSPLIT,$0-8
+       B       runtime∕internal∕atomic·Store(SB)
+
 TEXT runtime∕internal∕atomic·Xadduintptr(SB),NOSPLIT,$0-12
        B       runtime∕internal∕atomic·Xadd(SB)
 
index 56b89a5a0b038e52f88c6457daf7277f083a9abc..8336a859ad03df23e8d479b59fe284d92e76f29c 100644 (file)
@@ -29,6 +29,9 @@ ok:
 TEXT runtime∕internal∕atomic·Casuintptr(SB), NOSPLIT, $0-25
        B       runtime∕internal∕atomic·Cas64(SB)
 
+TEXT runtime∕internal∕atomic·CasRel(SB), NOSPLIT, $0-17
+       B       runtime∕internal∕atomic·Cas(SB)
+
 TEXT runtime∕internal∕atomic·Loaduintptr(SB), NOSPLIT, $0-16
        B       runtime∕internal∕atomic·Load64(SB)
 
index 19d131e5a61d2c76ff3384573b3a4f944c182ed5..9cb10371b7a48427078828752eb9ad5bb0aa5da1 100644 (file)
@@ -62,6 +62,9 @@ cas64_fail:
 TEXT ·Casuintptr(SB), NOSPLIT, $0-25
        JMP     ·Cas64(SB)
 
+TEXT ·CasRel(SB), NOSPLIT, $0-17
+       JMP     ·Cas(SB)
+
 TEXT ·Loaduintptr(SB),  NOSPLIT|NOFRAME, $0-16
        JMP     ·Load64(SB)
 
@@ -152,6 +155,9 @@ TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
 TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
        JMP     ·Store64(SB)
 
+TEXT ·StoreRel(SB), NOSPLIT, $0-12
+       JMP     ·Store(SB)
+
 TEXT ·Store(SB), NOSPLIT, $0-12
        MOVV    ptr+0(FP), R1
        MOVW    val+8(FP), R2
index 30550fd02e8d196b286fa15d358f58ce8515de63..73d7ea3ad4d6f40ea433c9ce71fe8995e0d78cc4 100644 (file)
@@ -70,6 +70,9 @@ try_xchg:
 TEXT ·Casuintptr(SB),NOSPLIT,$0-13
        JMP     ·Cas(SB)
 
+TEXT ·CasRel(SB),NOSPLIT,$0-13
+       JMP     ·Cas(SB)
+
 TEXT ·Loaduintptr(SB),NOSPLIT,$0-8
        JMP     ·Load(SB)
 
@@ -100,6 +103,9 @@ TEXT ·Xchguintptr(SB),NOSPLIT,$0-12
 TEXT ·StorepNoWB(SB),NOSPLIT,$0-8
        JMP     ·Store(SB)
 
+TEXT ·StoreRel(SB),NOSPLIT,$0-8
+       JMP     ·Store(SB)
+
 // void        Or8(byte volatile*, byte);
 TEXT ·Or8(SB),NOSPLIT,$0-5
        MOVW    ptr+0(FP), R1
index a2ed4adc91a0332bf95ed0ee349e61d9a90394e4..052b031cfbacef01db9f3717d607c4738bee6385 100644 (file)
@@ -59,6 +59,24 @@ cas64_fail:
        MOVB    R0, ret+24(FP)
        RET
 
+TEXT runtime∕internal∕atomic·CasRel(SB), NOSPLIT, $0-17
+       MOVD    ptr+0(FP), R3
+       MOVWZ   old+8(FP), R4
+       MOVWZ   new+12(FP), R5
+       LWSYNC
+cas_again:
+       LWAR    (R3), $0, R6        // 0 = Mutex release hint
+       CMPW    R6, R4
+       BNE     cas_fail
+       STWCCC  R5, (R3)
+       BNE     cas_again
+       MOVD    $1, R3
+       MOVB    R3, ret+16(FP)
+       RET
+cas_fail:
+       MOVB    R0, ret+16(FP)
+       RET
+
 TEXT runtime∕internal∕atomic·Casuintptr(SB), NOSPLIT, $0-25
        BR      runtime∕internal∕atomic·Cas64(SB)
 
@@ -159,6 +177,13 @@ TEXT runtime∕internal∕atomic·Store64(SB), NOSPLIT, $0-16
        MOVD    R4, 0(R3)
        RET
 
+TEXT runtime∕internal∕atomic·StoreRel(SB), NOSPLIT, $0-12
+       MOVD    ptr+0(FP), R3
+       MOVW    val+8(FP), R4
+       LWSYNC
+       MOVW    R4, 0(R3)
+       RET
+
 // void runtime∕internal∕atomic·Or8(byte volatile*, byte);
 TEXT runtime∕internal∕atomic·Or8(SB), NOSPLIT, $0-9
        MOVD    ptr+0(FP), R3
index e25703e0772a3fa3f059ecfb33ee4fec1ecb62f9..512fde5a12465cec8c5e4058b86b217236b15a95 100644 (file)
@@ -48,6 +48,10 @@ cas64_fail:
 TEXT ·Casuintptr(SB), NOSPLIT, $0-25
        BR      ·Cas64(SB)
 
+// func CasRel(ptr *uint32, old, new uint32) bool
+TEXT ·CasRel(SB), NOSPLIT, $0-17
+       BR      ·Cas(SB)
+
 // func Loaduintptr(ptr *uintptr) uintptr
 TEXT ·Loaduintptr(SB), NOSPLIT, $0-16
        BR      ·Load64(SB)
index 4284d2bd7d2408e4597e67830079eef39a41d088..ad71ebd971a7b70385045809226a9f186ebd856e 100644 (file)
@@ -20,6 +20,12 @@ func Loadp(ptr unsafe.Pointer) unsafe.Pointer {
        return *(*unsafe.Pointer)(ptr)
 }
 
+//go:nosplit
+//go:noinline
+func LoadAcq(ptr *uint32) uint32 {
+       return *ptr
+}
+
 //go:noescape
 func Xadd64(ptr *uint64, delta int64) uint64
 
@@ -52,11 +58,17 @@ func Or8(ptr *uint8, val uint8)
 //go:noescape
 func Cas64(ptr *uint64, old, new uint64) bool
 
+//go:noescape
+func CasRel(ptr *uint32, old, new uint32) bool
+
 //go:noescape
 func Store(ptr *uint32, val uint32)
 
 //go:noescape
 func Store64(ptr *uint64, val uint64)
 
+//go:noescape
+func StoreRel(ptr *uint32, val uint32)
+
 // NO go:noescape annotation; see atomic_pointer.go.
 func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
index 54851d30f4575267f766e7f2fd3dc4ceb4d5cf8b..d4fe4616099888ce63fbe45fdafd70e279d520de 100644 (file)
@@ -26,6 +26,12 @@ func Load64(ptr *uint64) uint64 {
        return *ptr
 }
 
+//go:nosplit
+//go:noinline
+func LoadAcq(ptr *uint32) uint32 {
+       return *ptr
+}
+
 //go:noescape
 func Xadd(ptr *uint32, delta int32) uint32
 
@@ -55,12 +61,18 @@ func Or8(ptr *uint8, val uint8)
 //go:noescape
 func Cas64(ptr *uint64, old, new uint64) bool
 
+//go:noescape
+func CasRel(ptr *uint32, old, new uint32) bool
+
 //go:noescape
 func Store(ptr *uint32, val uint32)
 
 //go:noescape
 func Store64(ptr *uint64, val uint64)
 
+//go:noescape
+func StoreRel(ptr *uint32, val uint32)
+
 // StorepNoWB performs *ptr = val atomically and without a write
 // barrier.
 //
index 1ecdb11db96b3dff547b519fb1ca494c3c736caf..51b42ba23846b5625cb7fdd78b0c217bf4a0df5f 100644 (file)
@@ -74,6 +74,9 @@ func StorepNoWB(addr unsafe.Pointer, v unsafe.Pointer)
 //go:noescape
 func Store(addr *uint32, v uint32)
 
+//go:noescape
+func StoreRel(addr *uint32, v uint32)
+
 //go:nosplit
 func goCas64(addr *uint64, old, new uint64) bool {
        if uintptr(unsafe.Pointer(addr))&7 != 0 {
@@ -181,9 +184,15 @@ func Load(addr *uint32) uint32
 //go:noescape
 func Loadp(addr unsafe.Pointer) unsafe.Pointer
 
+//go:noescape
+func LoadAcq(addr *uint32) uint32
+
 //go:noescape
 func Cas64(addr *uint64, old, new uint64) bool
 
+//go:noescape
+func CasRel(addr *uint32, old, new uint32) bool
+
 //go:noescape
 func Xadd64(addr *uint64, delta int64) uint64
 
index 3554b7f23674c2cacf71ba232e84e8e5df7dce7d..a2da27e7ed4eb8f9d7dd5ff709eea8e029270935 100644 (file)
@@ -35,6 +35,9 @@ func Load64(ptr *uint64) uint64
 //go:noescape
 func Loadp(ptr unsafe.Pointer) unsafe.Pointer
 
+//go:noescape
+func LoadAcq(addr *uint32) uint32
+
 //go:noescape
 func Or8(ptr *uint8, val uint8)
 
@@ -44,6 +47,9 @@ func And8(ptr *uint8, val uint8)
 //go:noescape
 func Cas64(ptr *uint64, old, new uint64) bool
 
+//go:noescape
+func CasRel(ptr *uint32, old, new uint32) bool
+
 //go:noescape
 func Store(ptr *uint32, val uint32)
 
@@ -52,3 +58,6 @@ func Store64(ptr *uint64, val uint64)
 
 // NO go:noescape annotation; see atomic_pointer.go.
 func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
+
+//go:noescape
+func StoreRel(ptr *uint32, val uint32)
index 354fd1e94bb7bccb010a6faf640db5af27df18c3..c979f2246f516ddd211a71d0d274b77cc45f7c51 100644 (file)
@@ -25,9 +25,16 @@ TEXT ·Loadp(SB),NOSPLIT,$0-16
        MOVD    R0, ret+8(FP)
        RET
 
+// uint32 runtime∕internal∕atomic·LoadAcq(uint32 volatile* addr)
+TEXT ·LoadAcq(SB),NOSPLIT,$0-12
+       B       ·Load(SB)
+
 TEXT runtime∕internal∕atomic·StorepNoWB(SB), NOSPLIT, $0-16
        B       runtime∕internal∕atomic·Store64(SB)
 
+TEXT runtime∕internal∕atomic·StoreRel(SB), NOSPLIT, $0-12
+       B       runtime∕internal∕atomic·Store(SB)
+
 TEXT runtime∕internal∕atomic·Store(SB), NOSPLIT, $0-12
        MOVD    ptr+0(FP), R0
        MOVW    val+8(FP), R1
index d06ea4809a7311a6ba8a702678be293254ab9f39..98a8fca929186c5d338fe19a7138206b9fcdd87d 100644 (file)
@@ -35,6 +35,9 @@ func Load64(ptr *uint64) uint64
 //go:noescape
 func Loadp(ptr unsafe.Pointer) unsafe.Pointer
 
+//go:noescape
+func LoadAcq(ptr *uint32) uint32
+
 //go:noescape
 func And8(ptr *uint8, val uint8)
 
@@ -46,6 +49,9 @@ func Or8(ptr *uint8, val uint8)
 //go:noescape
 func Cas64(ptr *uint64, old, new uint64) bool
 
+//go:noescape
+func CasRel(ptr *uint32, old, new uint32) bool
+
 //go:noescape
 func Store(ptr *uint32, val uint32)
 
@@ -54,3 +60,6 @@ func Store64(ptr *uint64, val uint64)
 
 // NO go:noescape annotation; see atomic_pointer.go.
 func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
+
+//go:noescape
+func StoreRel(ptr *uint32, val uint32)
index 087672f5ccf46ac6dce8b5fbc1d6b7df3b39de10..5214afe2d6753d291e47d9bdb070af63197ab1fc 100644 (file)
@@ -34,3 +34,7 @@ TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$0-16
        SYNC
        MOVV    R1, ret+8(FP)
        RET
+
+// uint32 runtime∕internal∕atomic·LoadAcq(uint32 volatile* ptr)
+TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-12
+       JMP     atomic·Load(SB)
index 55943f6925c258ad6e05ef7f0f2f41d4c07bcbec..1cd6d9a9ce33b5e3b7f347c26693ff349b9684ea 100644 (file)
@@ -119,6 +119,9 @@ func Load(ptr *uint32) uint32
 //go:noescape
 func Loadp(ptr unsafe.Pointer) unsafe.Pointer
 
+//go:noescape
+func LoadAcq(ptr *uint32) uint32
+
 //go:noescape
 func And8(ptr *uint8, val uint8)
 
@@ -130,3 +133,9 @@ func Store(ptr *uint32, val uint32)
 
 // NO go:noescape annotation; see atomic_pointer.go.
 func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
+
+//go:noescape
+func StoreRel(ptr *uint32, val uint32)
+
+//go:noescape
+func CasRel(addr *uint32, old, new uint32) bool
index 72c98eb0c5f6c6ba360d12d017a9d3f3276cd980..4f1a95c5bd8437e453e1eadc7bb09b96b76dda3b 100644 (file)
@@ -35,6 +35,9 @@ func Load64(ptr *uint64) uint64
 //go:noescape
 func Loadp(ptr unsafe.Pointer) unsafe.Pointer
 
+//go:noescape
+func LoadAcq(ptr *uint32) uint32
+
 //go:noescape
 func And8(ptr *uint8, val uint8)
 
@@ -46,11 +49,17 @@ func Or8(ptr *uint8, val uint8)
 //go:noescape
 func Cas64(ptr *uint64, old, new uint64) bool
 
+//go:noescape
+func CasRel(ptr *uint32, old, new uint32) bool
+
 //go:noescape
 func Store(ptr *uint32, val uint32)
 
 //go:noescape
 func Store64(ptr *uint64, val uint64)
 
+//go:noescape
+func StoreRel(ptr *uint32, val uint32)
+
 // NO go:noescape annotation; see atomic_pointer.go.
 func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
index c9c2d1fc0c6e4dcb3f23d835fc82c663d9e6f685..c079ea494f95607672fef0a9e0325507cd56cec8 100644 (file)
@@ -38,3 +38,12 @@ TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$-8-16
        ISYNC
        MOVD    R3, ret+8(FP)
        RET
+
+// uint32 runtime∕internal∕atomic·LoadAcq(uint32 volatile* ptr)
+TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$-8-12
+       MOVD   ptr+0(FP), R3
+       MOVWZ  0(R3), R3
+       CMPW   R3, R3, CR7
+       BC     4, 30, 1(PC) // bne- cr7, 0x4
+       MOVW   R3, ret+8(FP)
+       RET
index 9343853485e3352acf9d5c6acdc05e4fea56745f..ec294a27ba0736fa35b93b0939b15a0b3c3365da 100644 (file)
@@ -24,6 +24,12 @@ func Load64(ptr *uint64) uint64 {
        return *ptr
 }
 
+//go:nosplit
+//go:noinline
+func LoadAcq(ptr *uint32) uint32 {
+       return *ptr
+}
+
 //go:noinline
 //go:nosplit
 func Store(ptr *uint32, val uint32) {
@@ -43,6 +49,12 @@ func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer) {
        *(*uintptr)(ptr) = uintptr(val)
 }
 
+//go:noinline
+//go:nosplit
+func StoreRel(ptr *uint32, val uint32) {
+       *ptr = val
+}
+
 //go:noescape
 func And8(ptr *uint8, val uint8)
 
@@ -71,3 +83,6 @@ func Xchguintptr(ptr *uintptr, new uintptr) uintptr
 
 //go:noescape
 func Cas64(ptr *uint64, old, new uint64) bool
+
+//go:noescape
+func CasRel(ptr *uint32, old, new uint32) bool
index cbf254fcb5d40bdd0f34a3380f6d8a10f879b450..71288e9003dce44ff6b4ff220c2001e3388ded91 100644 (file)
@@ -21,6 +21,12 @@ func Loadp(ptr unsafe.Pointer) unsafe.Pointer {
        return *(*unsafe.Pointer)(ptr)
 }
 
+//go:nosplit
+//go:noinline
+func LoadAcq(ptr *uint32) uint32 {
+       return *ptr
+}
+
 //go:nosplit
 //go:noinline
 func Load64(ptr *uint64) uint64 {
@@ -105,6 +111,12 @@ func Store(ptr *uint32, val uint32) {
        *ptr = val
 }
 
+//go:nosplit
+//go:noinline
+func StoreRel(ptr *uint32, val uint32) {
+       *ptr = val
+}
+
 //go:nosplit
 //go:noinline
 func Store64(ptr *uint64, val uint64) {
@@ -147,6 +159,16 @@ func Casuintptr(ptr *uintptr, old, new uintptr) bool {
        return false
 }
 
+//go:nosplit
+//go:noinline
+func CasRel(ptr *uint32, old, new uint32) bool {
+       if *ptr == old {
+               *ptr = new
+               return true
+       }
+       return false
+}
+
 //go:nosplit
 //go:noinline
 func Storeuintptr(ptr *uintptr, new uintptr) {
index 86c14997b1df53deb089008bf73accf2e64c1d84..844e0237157414889041ab12058b116defde13f0 100644 (file)
@@ -4765,11 +4765,11 @@ func runqput(_p_ *p, gp *g, next bool) {
        }
 
 retry:
-       h := atomic.Load(&_p_.runqhead) // load-acquire, synchronize with consumers
+       h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with consumers
        t := _p_.runqtail
        if t-h < uint32(len(_p_.runq)) {
                _p_.runq[t%uint32(len(_p_.runq))].set(gp)
-               atomic.Store(&_p_.runqtail, t+1) // store-release, makes the item available for consumption
+               atomic.StoreRel(&_p_.runqtail, t+1) // store-release, makes the item available for consumption
                return
        }
        if runqputslow(_p_, gp, h, t) {
@@ -4793,7 +4793,7 @@ func runqputslow(_p_ *p, gp *g, h, t uint32) bool {
        for i := uint32(0); i < n; i++ {
                batch[i] = _p_.runq[(h+i)%uint32(len(_p_.runq))].ptr()
        }
-       if !atomic.Cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume
+       if !atomic.CasRel(&_p_.runqhead, h, h+n) { // cas-release, commits consume
                return false
        }
        batch[n] = gp
@@ -4837,13 +4837,13 @@ func runqget(_p_ *p) (gp *g, inheritTime bool) {
        }
 
        for {
-               h := atomic.Load(&_p_.runqhead) // load-acquire, synchronize with other consumers
+               h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with other consumers
                t := _p_.runqtail
                if t == h {
                        return nil, false
                }
                gp := _p_.runq[h%uint32(len(_p_.runq))].ptr()
-               if atomic.Cas(&_p_.runqhead, h, h+1) { // cas-release, commits consume
+               if atomic.CasRel(&_p_.runqhead, h, h+1) { // cas-release, commits consume
                        return gp, false
                }
        }
@@ -4855,8 +4855,8 @@ func runqget(_p_ *p) (gp *g, inheritTime bool) {
 // Can be executed by any P.
 func runqgrab(_p_ *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool) uint32 {
        for {
-               h := atomic.Load(&_p_.runqhead) // load-acquire, synchronize with other consumers
-               t := atomic.Load(&_p_.runqtail) // load-acquire, synchronize with the producer
+               h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with other consumers
+               t := atomic.LoadAcq(&_p_.runqtail) // load-acquire, synchronize with the producer
                n := t - h
                n = n - n/2
                if n == 0 {
@@ -4899,7 +4899,7 @@ func runqgrab(_p_ *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool
                        g := _p_.runq[(h+i)%uint32(len(_p_.runq))]
                        batch[(batchHead+i)%uint32(len(batch))] = g
                }
-               if atomic.Cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume
+               if atomic.CasRel(&_p_.runqhead, h, h+n) { // cas-release, commits consume
                        return n
                }
        }
@@ -4919,11 +4919,11 @@ func runqsteal(_p_, p2 *p, stealRunNextG bool) *g {
        if n == 0 {
                return gp
        }
-       h := atomic.Load(&_p_.runqhead) // load-acquire, synchronize with consumers
+       h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with consumers
        if t-h+n >= uint32(len(_p_.runq)) {
                throw("runqsteal: runq overflow")
        }
-       atomic.Store(&_p_.runqtail, t+n) // store-release, makes the item available for consumption
+       atomic.StoreRel(&_p_.runqtail, t+n) // store-release, makes the item available for consumption
        return gp
 }