p2.From.Reg = arm64.REGTMP
p2.To.Type = obj.TYPE_BRANCH
gc.Patch(p2, p)
+ case ssa.OpARM64LoweredAtomicExchange64Variant,
+ ssa.OpARM64LoweredAtomicExchange32Variant:
+ swap := arm64.ASWPALD
+ if v.Op == ssa.OpARM64LoweredAtomicExchange32Variant {
+ swap = arm64.ASWPALW
+ }
+ r0 := v.Args[0].Reg()
+ r1 := v.Args[1].Reg()
+ out := v.Reg0()
+
+ // SWPALD Rarg1, (Rarg0), Rout
+ p := s.Prog(swap)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = r1
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = r0
+ p.RegTo2 = out
+
case ssa.OpARM64LoweredAtomicAdd64,
ssa.OpARM64LoweredAtomicAdd32:
// LDAXR (Rarg0), Rout
p5.To.Type = obj.TYPE_REG
p5.To.Reg = out
gc.Patch(p2, p5)
+ case ssa.OpARM64LoweredAtomicCas64Variant,
+ ssa.OpARM64LoweredAtomicCas32Variant:
+ // Rarg0: ptr
+ // Rarg1: old
+ // Rarg2: new
+ // MOV Rarg1, Rtmp
+ // CASAL Rtmp, (Rarg0), Rarg2
+ // CMP Rarg1, Rtmp
+ // CSET EQ, Rout
+ cas := arm64.ACASALD
+ cmp := arm64.ACMP
+ mov := arm64.AMOVD
+ if v.Op == ssa.OpARM64LoweredAtomicCas32Variant {
+ cas = arm64.ACASALW
+ cmp = arm64.ACMPW
+ mov = arm64.AMOVW
+ }
+ r0 := v.Args[0].Reg()
+ r1 := v.Args[1].Reg()
+ r2 := v.Args[2].Reg()
+ out := v.Reg0()
+
+ // MOV Rarg1, Rtmp
+ p := s.Prog(mov)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = r1
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = arm64.REGTMP
+
+ // CASAL Rtmp, (Rarg0), Rarg2
+ p1 := s.Prog(cas)
+ p1.From.Type = obj.TYPE_REG
+ p1.From.Reg = arm64.REGTMP
+ p1.To.Type = obj.TYPE_MEM
+ p1.To.Reg = r0
+ p1.RegTo2 = r2
+
+ // CMP Rarg1, Rtmp
+ p2 := s.Prog(cmp)
+ p2.From.Type = obj.TYPE_REG
+ p2.From.Reg = r1
+ p2.Reg = arm64.REGTMP
+
+ // CSET EQ, Rout
+ p3 := s.Prog(arm64.ACSET)
+ p3.From.Type = obj.TYPE_REG
+ p3.From.Reg = arm64.COND_EQ
+ p3.To.Type = obj.TYPE_REG
+ p3.To.Reg = out
+
case ssa.OpARM64LoweredAtomicAnd8,
ssa.OpARM64LoweredAtomicAnd32,
ssa.OpARM64LoweredAtomicOr8,
p3.From.Reg = arm64.REGTMP
p3.To.Type = obj.TYPE_BRANCH
gc.Patch(p3, p)
+ case ssa.OpARM64LoweredAtomicAnd8Variant,
+ ssa.OpARM64LoweredAtomicAnd32Variant:
+ atomic_clear := arm64.ALDCLRALW
+ if v.Op == ssa.OpARM64LoweredAtomicAnd8Variant {
+ atomic_clear = arm64.ALDCLRALB
+ }
+ r0 := v.Args[0].Reg()
+ r1 := v.Args[1].Reg()
+ out := v.Reg0()
+
+ // MNV Rarg1 Rtemp
+ p := s.Prog(arm64.AMVN)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = r1
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = arm64.REGTMP
+
+ // LDCLRALW Rtemp, (Rarg0), Rout
+ p1 := s.Prog(atomic_clear)
+ p1.From.Type = obj.TYPE_REG
+ p1.From.Reg = arm64.REGTMP
+ p1.To.Type = obj.TYPE_MEM
+ p1.To.Reg = r0
+ p1.RegTo2 = out
+
+ // AND Rarg1, Rout
+ p2 := s.Prog(arm64.AAND)
+ p2.From.Type = obj.TYPE_REG
+ p2.From.Reg = r1
+ p2.To.Type = obj.TYPE_REG
+ p2.To.Reg = out
+
+ case ssa.OpARM64LoweredAtomicOr8Variant,
+ ssa.OpARM64LoweredAtomicOr32Variant:
+ atomic_or := arm64.ALDORALW
+ if v.Op == ssa.OpARM64LoweredAtomicOr8Variant {
+ atomic_or = arm64.ALDORALB
+ }
+ r0 := v.Args[0].Reg()
+ r1 := v.Args[1].Reg()
+ out := v.Reg0()
+
+ // LDORALW Rarg1, (Rarg0), Rout
+ p := s.Prog(atomic_or)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = r1
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = r0
+ p.RegTo2 = out
+
+ // ORR Rarg1, Rout
+ p2 := s.Prog(arm64.AORR)
+ p2.From.Type = obj.TYPE_REG
+ p2.From.Reg = r1
+ p2.To.Type = obj.TYPE_REG
+ p2.To.Reg = out
+
case ssa.OpARM64MOVBreg,
ssa.OpARM64MOVBUreg,
ssa.OpARM64MOVHreg,
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
return s.newValue1(ssa.OpSelect0, types.Types[TUINT32], v)
},
- sys.AMD64, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
+ sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
addF("runtime/internal/atomic", "Xchg64",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
v := s.newValue3(ssa.OpAtomicExchange64, types.NewTuple(types.Types[TUINT64], types.TypeMem), args[0], args[1], s.mem())
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
return s.newValue1(ssa.OpSelect0, types.Types[TUINT64], v)
},
- sys.AMD64, sys.ARM64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
-
- addF("runtime/internal/atomic", "Xadd",
- func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
- v := s.newValue3(ssa.OpAtomicAdd32, types.NewTuple(types.Types[TUINT32], types.TypeMem), args[0], args[1], s.mem())
- s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
- return s.newValue1(ssa.OpSelect0, types.Types[TUINT32], v)
- },
- sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
- addF("runtime/internal/atomic", "Xadd64",
- func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
- v := s.newValue3(ssa.OpAtomicAdd64, types.NewTuple(types.Types[TUINT64], types.TypeMem), args[0], args[1], s.mem())
- s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
- return s.newValue1(ssa.OpSelect0, types.Types[TUINT64], v)
- },
sys.AMD64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
- makeXaddARM64 := func(op0 ssa.Op, op1 ssa.Op, ty types.EType) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+ type atomicOpEmitter func(s *state, n *Node, args []*ssa.Value, op ssa.Op, typ types.EType)
+
+ makeAtomicGuardedIntrinsicARM64 := func(op0, op1 ssa.Op, typ, rtyp types.EType, emit atomicOpEmitter) intrinsicBuilder {
+
return func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
// Target Atomic feature is identified by dynamic detection
addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), arm64HasATOMICS, s.sb)
bEnd := s.f.NewBlock(ssa.BlockPlain)
b.AddEdgeTo(bTrue)
b.AddEdgeTo(bFalse)
- b.Likely = ssa.BranchUnlikely // most machines don't have Atomics nowadays
+ b.Likely = ssa.BranchLikely
// We have atomic instructions - use it directly.
s.startBlock(bTrue)
- v0 := s.newValue3(op1, types.NewTuple(types.Types[ty], types.TypeMem), args[0], args[1], s.mem())
- s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v0)
- s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[ty], v0)
+ emit(s, n, args, op1, typ)
s.endBlock().AddEdgeTo(bEnd)
// Use original instruction sequence.
s.startBlock(bFalse)
- v1 := s.newValue3(op0, types.NewTuple(types.Types[ty], types.TypeMem), args[0], args[1], s.mem())
- s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v1)
- s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[ty], v1)
+ emit(s, n, args, op0, typ)
s.endBlock().AddEdgeTo(bEnd)
// Merge results.
s.startBlock(bEnd)
- return s.variable(n, types.Types[ty])
+ if rtyp == TNIL {
+ return nil
+ } else {
+ return s.variable(n, types.Types[rtyp])
+ }
}
}
+ atomicXchgXaddEmitterARM64 := func(s *state, n *Node, args []*ssa.Value, op ssa.Op, typ types.EType) {
+ v := s.newValue3(op, types.NewTuple(types.Types[typ], types.TypeMem), args[0], args[1], s.mem())
+ s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
+ s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v)
+ }
+ addF("runtime/internal/atomic", "Xchg",
+ makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange32, ssa.OpAtomicExchange32Variant, TUINT32, TUINT32, atomicXchgXaddEmitterARM64),
+ sys.ARM64)
+ addF("runtime/internal/atomic", "Xchg64",
+ makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange64, ssa.OpAtomicExchange64Variant, TUINT64, TUINT64, atomicXchgXaddEmitterARM64),
+ sys.ARM64)
+
addF("runtime/internal/atomic", "Xadd",
- makeXaddARM64(ssa.OpAtomicAdd32, ssa.OpAtomicAdd32Variant, TUINT32),
+ func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+ v := s.newValue3(ssa.OpAtomicAdd32, types.NewTuple(types.Types[TUINT32], types.TypeMem), args[0], args[1], s.mem())
+ s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
+ return s.newValue1(ssa.OpSelect0, types.Types[TUINT32], v)
+ },
+ sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
+ addF("runtime/internal/atomic", "Xadd64",
+ func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+ v := s.newValue3(ssa.OpAtomicAdd64, types.NewTuple(types.Types[TUINT64], types.TypeMem), args[0], args[1], s.mem())
+ s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
+ return s.newValue1(ssa.OpSelect0, types.Types[TUINT64], v)
+ },
+ sys.AMD64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
+
+ addF("runtime/internal/atomic", "Xadd",
+ makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd32, ssa.OpAtomicAdd32Variant, TUINT32, TUINT32, atomicXchgXaddEmitterARM64),
sys.ARM64)
addF("runtime/internal/atomic", "Xadd64",
- makeXaddARM64(ssa.OpAtomicAdd64, ssa.OpAtomicAdd64Variant, TUINT64),
+ makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd64, ssa.OpAtomicAdd64Variant, TUINT64, TUINT64, atomicXchgXaddEmitterARM64),
sys.ARM64)
addF("runtime/internal/atomic", "Cas",
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
return s.newValue1(ssa.OpSelect0, types.Types[TBOOL], v)
},
- sys.AMD64, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
+ sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
addF("runtime/internal/atomic", "Cas64",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
v := s.newValue4(ssa.OpAtomicCompareAndSwap64, types.NewTuple(types.Types[TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
return s.newValue1(ssa.OpSelect0, types.Types[TBOOL], v)
},
- sys.AMD64, sys.ARM64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
+ sys.AMD64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
addF("runtime/internal/atomic", "CasRel",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
v := s.newValue4(ssa.OpAtomicCompareAndSwap32, types.NewTuple(types.Types[TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
},
sys.PPC64)
+ atomicCasEmitterARM64 := func(s *state, n *Node, args []*ssa.Value, op ssa.Op, typ types.EType) {
+ v := s.newValue4(op, types.NewTuple(types.Types[TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
+ s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
+ s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v)
+ }
+
+ addF("runtime/internal/atomic", "Cas",
+ makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicCompareAndSwap32, ssa.OpAtomicCompareAndSwap32Variant, TUINT32, TBOOL, atomicCasEmitterARM64),
+ sys.ARM64)
+ addF("runtime/internal/atomic", "Cas64",
+ makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicCompareAndSwap64, ssa.OpAtomicCompareAndSwap64Variant, TUINT64, TBOOL, atomicCasEmitterARM64),
+ sys.ARM64)
+
addF("runtime/internal/atomic", "And8",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicAnd8, types.TypeMem, args[0], args[1], s.mem())
return nil
},
- sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X)
+ sys.AMD64, sys.MIPS, sys.PPC64, sys.S390X)
addF("runtime/internal/atomic", "And",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicAnd32, types.TypeMem, args[0], args[1], s.mem())
return nil
},
- sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X)
+ sys.AMD64, sys.MIPS, sys.PPC64, sys.S390X)
addF("runtime/internal/atomic", "Or8",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr8, types.TypeMem, args[0], args[1], s.mem())
s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr32, types.TypeMem, args[0], args[1], s.mem())
return nil
},
- sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X)
+ sys.AMD64, sys.MIPS, sys.PPC64, sys.S390X)
+
+ atomicAndOrEmitterARM64 := func(s *state, n *Node, args []*ssa.Value, op ssa.Op, typ types.EType) {
+ s.vars[&memVar] = s.newValue3(op, types.TypeMem, args[0], args[1], s.mem())
+ }
+
+ addF("runtime/internal/atomic", "And8",
+ makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd8, ssa.OpAtomicAnd8Variant, TNIL, TNIL, atomicAndOrEmitterARM64),
+ sys.ARM64)
+ addF("runtime/internal/atomic", "And",
+ makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd32, ssa.OpAtomicAnd32Variant, TNIL, TNIL, atomicAndOrEmitterARM64),
+ sys.ARM64)
+ addF("runtime/internal/atomic", "Or8",
+ makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr8, ssa.OpAtomicOr8Variant, TNIL, TNIL, atomicAndOrEmitterARM64),
+ sys.ARM64)
+ addF("runtime/internal/atomic", "Or",
+ makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr32, ssa.OpAtomicOr32Variant, TNIL, TNIL, atomicAndOrEmitterARM64),
+ sys.ARM64)
alias("runtime/internal/atomic", "Loadint64", "runtime/internal/atomic", "Load64", all...)
alias("runtime/internal/atomic", "Xaddint64", "runtime/internal/atomic", "Xadd64", all...)
(AtomicStore64 ...) => (STLR ...)
(AtomicStorePtrNoWB ...) => (STLR ...)
-(AtomicExchange(32|64) ...) => (LoweredAtomicExchange(32|64) ...)
-(AtomicAdd(32|64) ...) => (LoweredAtomicAdd(32|64) ...)
+(AtomicExchange(32|64) ...) => (LoweredAtomicExchange(32|64) ...)
+(AtomicAdd(32|64) ...) => (LoweredAtomicAdd(32|64) ...)
(AtomicCompareAndSwap(32|64) ...) => (LoweredAtomicCas(32|64) ...)
+(AtomicAdd(32|64)Variant ...) => (LoweredAtomicAdd(32|64)Variant ...)
+(AtomicExchange(32|64)Variant ...) => (LoweredAtomicExchange(32|64)Variant ...)
+(AtomicCompareAndSwap(32|64)Variant ...) => (LoweredAtomicCas(32|64)Variant ...)
+
// Currently the updated value is not used, but we need a register to temporarily hold it.
(AtomicAnd8 ptr val mem) => (Select1 (LoweredAtomicAnd8 ptr val mem))
(AtomicAnd32 ptr val mem) => (Select1 (LoweredAtomicAnd32 ptr val mem))
(AtomicOr8 ptr val mem) => (Select1 (LoweredAtomicOr8 ptr val mem))
(AtomicOr32 ptr val mem) => (Select1 (LoweredAtomicOr32 ptr val mem))
-(AtomicAdd(32|64)Variant ...) => (LoweredAtomicAdd(32|64)Variant ...)
+(AtomicAnd8Variant ptr val mem) => (Select1 (LoweredAtomicAnd8Variant ptr val mem))
+(AtomicAnd32Variant ptr val mem) => (Select1 (LoweredAtomicAnd32Variant ptr val mem))
+(AtomicOr8Variant ptr val mem) => (Select1 (LoweredAtomicOr8Variant ptr val mem))
+(AtomicOr32Variant ptr val mem) => (Select1 (LoweredAtomicOr32Variant ptr val mem))
// Write barrier.
(WB ...) => (LoweredWB ...)
{name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
{name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
+ // atomic exchange variant.
+ // store arg1 to arg0. arg2=mem. returns <old content of *arg0, memory>. auxint must be zero.
+ // SWPALD Rarg1, (Rarg0), Rout
+ {name: "LoweredAtomicExchange64Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},
+ {name: "LoweredAtomicExchange32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},
+
// atomic add.
// *arg0 += arg1. arg2=mem. returns <new content of *arg0, memory>. auxint must be zero.
// LDAXR (Rarg0), Rout
{name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
{name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
+ // atomic compare and swap variant.
+ // arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. auxint must be zero.
+ // if *arg0 == arg1 {
+ // *arg0 = arg2
+ // return (true, memory)
+ // } else {
+ // return (false, memory)
+ // }
+ // MOV Rarg1, Rtmp
+ // CASAL Rtmp, (Rarg0), Rarg2
+ // CMP Rarg1, Rtmp
+ // CSET EQ, Rout
+ {name: "LoweredAtomicCas64Variant", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
+ {name: "LoweredAtomicCas32Variant", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
+
// atomic and/or.
// *arg0 &= (|=) arg1. arg2=mem. returns <new content of *arg0, memory>. auxint must be zero.
// LDAXR (Rarg0), Rout
{name: "LoweredAtomicOr8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
{name: "LoweredAtomicOr32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
+ // atomic and/or variant.
+ // *arg0 &= (|=) arg1. arg2=mem. returns <new content of *arg0, memory>. auxint must be zero.
+ // AND:
+ // MNV Rarg1, Rtemp
+ // LDANDALB Rtemp, (Rarg0), Rout
+ // AND Rarg1, Rout
+ // OR:
+ // LDORALB Rarg1, (Rarg0), Rout
+ // ORR Rarg1, Rout
+ {name: "LoweredAtomicAnd8Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
+ {name: "LoweredAtomicAnd32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
+ {name: "LoweredAtomicOr8Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true},
+ {name: "LoweredAtomicOr32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true},
+
// LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
// It saves all GP registers if necessary,
// but clobbers R30 (LR) because it's a call.
// These variants have the same semantics as above atomic operations.
// But they are used for generating more efficient code on certain modern machines, with run-time CPU feature detection.
// Currently, they are used on ARM64 only.
- {name: "AtomicAdd32Variant", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Do *arg0 += arg1. arg2=memory. Returns sum and new memory.
- {name: "AtomicAdd64Variant", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true}, // Do *arg0 += arg1. arg2=memory. Returns sum and new memory.
+ {name: "AtomicAdd32Variant", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Do *arg0 += arg1. arg2=memory. Returns sum and new memory.
+ {name: "AtomicAdd64Variant", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true}, // Do *arg0 += arg1. arg2=memory. Returns sum and new memory.
+ {name: "AtomicExchange32Variant", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns old contents of *arg0 and new memory.
+ {name: "AtomicExchange64Variant", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns old contents of *arg0 and new memory.
+ {name: "AtomicCompareAndSwap32Variant", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Returns true if store happens and new memory.
+ {name: "AtomicCompareAndSwap64Variant", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Returns true if store happens and new memory.
+ {name: "AtomicAnd8Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory.
+ {name: "AtomicAnd32Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory.
+ {name: "AtomicOr8Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory.
+ {name: "AtomicOr32Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory.
// Clobber experiment op
{name: "Clobber", argLength: 0, typ: "Void", aux: "SymOff", symEffect: "None"}, // write an invalid pointer value to the given pointer slot of a stack variable
OpARM64STLRW
OpARM64LoweredAtomicExchange64
OpARM64LoweredAtomicExchange32
+ OpARM64LoweredAtomicExchange64Variant
+ OpARM64LoweredAtomicExchange32Variant
OpARM64LoweredAtomicAdd64
OpARM64LoweredAtomicAdd32
OpARM64LoweredAtomicAdd64Variant
OpARM64LoweredAtomicAdd32Variant
OpARM64LoweredAtomicCas64
OpARM64LoweredAtomicCas32
+ OpARM64LoweredAtomicCas64Variant
+ OpARM64LoweredAtomicCas32Variant
OpARM64LoweredAtomicAnd8
OpARM64LoweredAtomicAnd32
OpARM64LoweredAtomicOr8
OpARM64LoweredAtomicOr32
+ OpARM64LoweredAtomicAnd8Variant
+ OpARM64LoweredAtomicAnd32Variant
+ OpARM64LoweredAtomicOr8Variant
+ OpARM64LoweredAtomicOr32Variant
OpARM64LoweredWB
OpARM64LoweredPanicBoundsA
OpARM64LoweredPanicBoundsB
OpAtomicOr32
OpAtomicAdd32Variant
OpAtomicAdd64Variant
+ OpAtomicExchange32Variant
+ OpAtomicExchange64Variant
+ OpAtomicCompareAndSwap32Variant
+ OpAtomicCompareAndSwap64Variant
+ OpAtomicAnd8Variant
+ OpAtomicAnd32Variant
+ OpAtomicOr8Variant
+ OpAtomicOr32Variant
OpClobber
)
},
},
},
+ {
+ name: "LoweredAtomicExchange64Variant",
+ argLen: 3,
+ resultNotInArgs: true,
+ faultOnNilArg0: true,
+ hasSideEffects: true,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+ },
+ outputs: []outputInfo{
+ {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
+ {
+ name: "LoweredAtomicExchange32Variant",
+ argLen: 3,
+ resultNotInArgs: true,
+ faultOnNilArg0: true,
+ hasSideEffects: true,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+ },
+ outputs: []outputInfo{
+ {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
{
name: "LoweredAtomicAdd64",
argLen: 3,
},
},
},
+ {
+ name: "LoweredAtomicCas64Variant",
+ argLen: 4,
+ resultNotInArgs: true,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ hasSideEffects: true,
+ unsafePoint: true,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+ },
+ outputs: []outputInfo{
+ {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
+ {
+ name: "LoweredAtomicCas32Variant",
+ argLen: 4,
+ resultNotInArgs: true,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ hasSideEffects: true,
+ unsafePoint: true,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+ },
+ outputs: []outputInfo{
+ {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
{
name: "LoweredAtomicAnd8",
argLen: 3,
},
},
},
+ {
+ name: "LoweredAtomicAnd8Variant",
+ argLen: 3,
+ resultNotInArgs: true,
+ faultOnNilArg0: true,
+ hasSideEffects: true,
+ unsafePoint: true,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+ },
+ outputs: []outputInfo{
+ {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
+ {
+ name: "LoweredAtomicAnd32Variant",
+ argLen: 3,
+ resultNotInArgs: true,
+ faultOnNilArg0: true,
+ hasSideEffects: true,
+ unsafePoint: true,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+ },
+ outputs: []outputInfo{
+ {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
+ {
+ name: "LoweredAtomicOr8Variant",
+ argLen: 3,
+ resultNotInArgs: true,
+ faultOnNilArg0: true,
+ hasSideEffects: true,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+ },
+ outputs: []outputInfo{
+ {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
+ {
+ name: "LoweredAtomicOr32Variant",
+ argLen: 3,
+ resultNotInArgs: true,
+ faultOnNilArg0: true,
+ hasSideEffects: true,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+ },
+ outputs: []outputInfo{
+ {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
{
name: "LoweredWB",
auxType: auxSym,
hasSideEffects: true,
generic: true,
},
+ {
+ name: "AtomicExchange32Variant",
+ argLen: 3,
+ hasSideEffects: true,
+ generic: true,
+ },
+ {
+ name: "AtomicExchange64Variant",
+ argLen: 3,
+ hasSideEffects: true,
+ generic: true,
+ },
+ {
+ name: "AtomicCompareAndSwap32Variant",
+ argLen: 4,
+ hasSideEffects: true,
+ generic: true,
+ },
+ {
+ name: "AtomicCompareAndSwap64Variant",
+ argLen: 4,
+ hasSideEffects: true,
+ generic: true,
+ },
+ {
+ name: "AtomicAnd8Variant",
+ argLen: 3,
+ hasSideEffects: true,
+ generic: true,
+ },
+ {
+ name: "AtomicAnd32Variant",
+ argLen: 3,
+ hasSideEffects: true,
+ generic: true,
+ },
+ {
+ name: "AtomicOr8Variant",
+ argLen: 3,
+ hasSideEffects: true,
+ generic: true,
+ },
+ {
+ name: "AtomicOr32Variant",
+ argLen: 3,
+ hasSideEffects: true,
+ generic: true,
+ },
{
name: "Clobber",
auxType: auxSymOff,
return true
case OpAtomicAnd32:
return rewriteValueARM64_OpAtomicAnd32(v)
+ case OpAtomicAnd32Variant:
+ return rewriteValueARM64_OpAtomicAnd32Variant(v)
case OpAtomicAnd8:
return rewriteValueARM64_OpAtomicAnd8(v)
+ case OpAtomicAnd8Variant:
+ return rewriteValueARM64_OpAtomicAnd8Variant(v)
case OpAtomicCompareAndSwap32:
v.Op = OpARM64LoweredAtomicCas32
return true
+ case OpAtomicCompareAndSwap32Variant:
+ v.Op = OpARM64LoweredAtomicCas32Variant
+ return true
case OpAtomicCompareAndSwap64:
v.Op = OpARM64LoweredAtomicCas64
return true
+ case OpAtomicCompareAndSwap64Variant:
+ v.Op = OpARM64LoweredAtomicCas64Variant
+ return true
case OpAtomicExchange32:
v.Op = OpARM64LoweredAtomicExchange32
return true
+ case OpAtomicExchange32Variant:
+ v.Op = OpARM64LoweredAtomicExchange32Variant
+ return true
case OpAtomicExchange64:
v.Op = OpARM64LoweredAtomicExchange64
return true
+ case OpAtomicExchange64Variant:
+ v.Op = OpARM64LoweredAtomicExchange64Variant
+ return true
case OpAtomicLoad32:
v.Op = OpARM64LDARW
return true
return true
case OpAtomicOr32:
return rewriteValueARM64_OpAtomicOr32(v)
+ case OpAtomicOr32Variant:
+ return rewriteValueARM64_OpAtomicOr32Variant(v)
case OpAtomicOr8:
return rewriteValueARM64_OpAtomicOr8(v)
+ case OpAtomicOr8Variant:
+ return rewriteValueARM64_OpAtomicOr8Variant(v)
case OpAtomicStore32:
v.Op = OpARM64STLRW
return true
return true
}
}
+func rewriteValueARM64_OpAtomicAnd32Variant(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ typ := &b.Func.Config.Types
+ // match: (AtomicAnd32Variant ptr val mem)
+ // result: (Select1 (LoweredAtomicAnd32Variant ptr val mem))
+ for {
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpSelect1)
+ v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicAnd32Variant, types.NewTuple(typ.UInt32, types.TypeMem))
+ v0.AddArg3(ptr, val, mem)
+ v.AddArg(v0)
+ return true
+ }
+}
func rewriteValueARM64_OpAtomicAnd8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
return true
}
}
+func rewriteValueARM64_OpAtomicAnd8Variant(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ typ := &b.Func.Config.Types
+ // match: (AtomicAnd8Variant ptr val mem)
+ // result: (Select1 (LoweredAtomicAnd8Variant ptr val mem))
+ for {
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpSelect1)
+ v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicAnd8Variant, types.NewTuple(typ.UInt8, types.TypeMem))
+ v0.AddArg3(ptr, val, mem)
+ v.AddArg(v0)
+ return true
+ }
+}
func rewriteValueARM64_OpAtomicOr32(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
return true
}
}
+func rewriteValueARM64_OpAtomicOr32Variant(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ typ := &b.Func.Config.Types
+ // match: (AtomicOr32Variant ptr val mem)
+ // result: (Select1 (LoweredAtomicOr32Variant ptr val mem))
+ for {
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpSelect1)
+ v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicOr32Variant, types.NewTuple(typ.UInt32, types.TypeMem))
+ v0.AddArg3(ptr, val, mem)
+ v.AddArg(v0)
+ return true
+ }
+}
func rewriteValueARM64_OpAtomicOr8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
return true
}
}
+func rewriteValueARM64_OpAtomicOr8Variant(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ typ := &b.Func.Config.Types
+ // match: (AtomicOr8Variant ptr val mem)
+ // result: (Select1 (LoweredAtomicOr8Variant ptr val mem))
+ for {
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpSelect1)
+ v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicOr8Variant, types.NewTuple(typ.UInt8, types.TypeMem))
+ v0.AddArg3(ptr, val, mem)
+ v.AddArg(v0)
+ return true
+ }
+}
func rewriteValueARM64_OpAvg64u(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
}
})
}
+
+func BenchmarkCas(b *testing.B) {
+ var x uint32
+ x = 1
+ ptr := &x
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ atomic.Cas(ptr, 1, 0)
+ atomic.Cas(ptr, 0, 1)
+ }
+ })
+}
+
+func BenchmarkCas64(b *testing.B) {
+ var x uint64
+ x = 1
+ ptr := &x
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ atomic.Cas64(ptr, 1, 0)
+ atomic.Cas64(ptr, 0, 1)
+ }
+ })
+}
+func BenchmarkXchg(b *testing.B) {
+ var x uint32
+ x = 1
+ ptr := &x
+ b.RunParallel(func(pb *testing.PB) {
+ var y uint32
+ y = 1
+ for pb.Next() {
+ y = atomic.Xchg(ptr, y)
+ y += 1
+ }
+ })
+}
+
+func BenchmarkXchg64(b *testing.B) {
+ var x uint64
+ x = 1
+ ptr := &x
+ b.RunParallel(func(pb *testing.PB) {
+ var y uint64
+ y = 1
+ for pb.Next() {
+ y = atomic.Xchg64(ptr, y)
+ y += 1
+ }
+ })
+}