]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: improve atomic swap intrinsics on arm64
authorJonathan Swinney <jswinney@amazon.com>
Wed, 4 Nov 2020 16:18:23 +0000 (16:18 +0000)
committerCherry Zhang <cherryyz@google.com>
Thu, 5 Nov 2020 23:21:33 +0000 (23:21 +0000)
ARMv8.1 has added new instructions for atomic memory operations. This
change builds on the previous change which added support for atomic add,
0a7ac93c27c9ade79fe0f66ae0bb81484c241ae5, to include similar support for
atomic-compare-and-swap, atomic-swap, atomic-or, and atomic-and
intrinsics. Since the new instructions are not guaranteed to be present,
we guard their usages with a branch on a CPU feature.

Peformance on an ARMv8.1 machine:
name                 old time/op  new time/op  delta
CompareAndSwap-16    37.9ns ±16%  24.1ns ± 4%  -36.44%  (p=0.000 n=10+9)
CompareAndSwap64-16  38.6ns ±15%  24.1ns ± 3%  -37.47%  (p=0.000 n=10+10)

name       old time/op  new time/op  delta
Swap-16    46.9ns ±32%  12.5ns ± 6%  -73.40%  (p=0.000 n=10+10)
Swap64-16  53.4ns ± 1%  12.5ns ± 6%  -76.56%  (p=0.000 n=10+10)

name            old time/op  new time/op  delta
Or8-16          8.81ns ± 0%  5.61ns ± 0%  -36.32%  (p=0.000 n=10+10)
Or-16           7.21ns ± 0%  5.61ns ± 0%  -22.19%  (p=0.000 n=10+10)
Or8Parallel-16  59.8ns ± 3%  12.5ns ± 2%  -79.10%  (p=0.000 n=10+10)
OrParallel-16   51.7ns ± 3%  12.5ns ± 2%  -75.84%  (p=0.000 n=10+10)

name             old time/op  new time/op  delta
And8-16          8.81ns ± 0%  5.61ns ± 0%  -36.32%  (p=0.000 n=10+10)
And-16           7.21ns ± 0%  5.61ns ± 0%  -22.19%  (p=0.000 n=10+10)
And8Parallel-16  59.1ns ± 6%  12.8ns ± 3%  -78.33%  (p=0.000 n=10+10)
AndParallel-16   51.4ns ± 7%  12.8ns ± 3%  -75.03%  (p=0.000 n=10+10)

Performance on an ARMv8.0 machine (no atomics instructions):
name                 old time/op  new time/op  delta
CompareAndSwap-16    61.3ns ± 0%  62.4ns ± 0%  +1.70%  (p=0.000 n=8+9)
CompareAndSwap64-16  62.0ns ± 3%  61.3ns ± 2%    ~     (p=0.093 n=10+10)

name       old time/op  new time/op  delta
Swap-16     127ns ± 2%   131ns ± 2%  +2.91%  (p=0.001 n=10+10)
Swap64-16   128ns ± 1%   131ns ± 2%  +2.43%  (p=0.001 n=10+10)

name            old time/op  new time/op  delta
Or8-16          14.9ns ± 0%  15.3ns ± 0%  +2.68%  (p=0.000 n=10+10)
Or-16           11.8ns ± 0%  12.3ns ± 0%  +4.24%  (p=0.000 n=10+10)
Or8Parallel-16   137ns ± 1%   144ns ± 1%  +4.97%  (p=0.000 n=10+10)
OrParallel-16    128ns ± 1%   136ns ± 1%  +6.34%  (p=0.000 n=10+10)

name             old time/op  new time/op  delta
And8-16          14.9ns ± 0%  15.3ns ± 0%  +2.68%  (p=0.000 n=10+10)
And-16           11.8ns ± 0%  12.3ns ± 0%  +4.24%  (p=0.000 n=10+10)
And8Parallel-16   134ns ± 2%   141ns ± 1%  +5.29%  (p=0.000 n=10+10)
AndParallel-16    125ns ± 2%   134ns ± 1%  +7.10%  (p=0.000 n=10+10)

Fixes #39304

Change-Id: Idaca68701d4751650be6b4bedca3d57f51571712
Reviewed-on: https://go-review.googlesource.com/c/go/+/234217
Run-TryBot: Emmanuel Odeke <emmanuel@orijtech.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Trust: fannie zhang <Fannie.Zhang@arm.com>

src/cmd/compile/internal/arm64/ssa.go
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/gen/ARM64Ops.go
src/cmd/compile/internal/ssa/gen/genericOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteARM64.go
src/runtime/internal/atomic/bench_test.go

index 5c695ef84cbb5c381bfd1f4ce98fd8559e5bed54..22b28a9308bd39270376e491a2c100a3ea2cb86c 100644 (file)
@@ -581,6 +581,24 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p2.From.Reg = arm64.REGTMP
                p2.To.Type = obj.TYPE_BRANCH
                gc.Patch(p2, p)
+       case ssa.OpARM64LoweredAtomicExchange64Variant,
+               ssa.OpARM64LoweredAtomicExchange32Variant:
+               swap := arm64.ASWPALD
+               if v.Op == ssa.OpARM64LoweredAtomicExchange32Variant {
+                       swap = arm64.ASWPALW
+               }
+               r0 := v.Args[0].Reg()
+               r1 := v.Args[1].Reg()
+               out := v.Reg0()
+
+               // SWPALD       Rarg1, (Rarg0), Rout
+               p := s.Prog(swap)
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = r1
+               p.To.Type = obj.TYPE_MEM
+               p.To.Reg = r0
+               p.RegTo2 = out
+
        case ssa.OpARM64LoweredAtomicAdd64,
                ssa.OpARM64LoweredAtomicAdd32:
                // LDAXR        (Rarg0), Rout
@@ -687,6 +705,56 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p5.To.Type = obj.TYPE_REG
                p5.To.Reg = out
                gc.Patch(p2, p5)
+       case ssa.OpARM64LoweredAtomicCas64Variant,
+               ssa.OpARM64LoweredAtomicCas32Variant:
+               // Rarg0: ptr
+               // Rarg1: old
+               // Rarg2: new
+               // MOV          Rarg1, Rtmp
+               // CASAL        Rtmp, (Rarg0), Rarg2
+               // CMP          Rarg1, Rtmp
+               // CSET         EQ, Rout
+               cas := arm64.ACASALD
+               cmp := arm64.ACMP
+               mov := arm64.AMOVD
+               if v.Op == ssa.OpARM64LoweredAtomicCas32Variant {
+                       cas = arm64.ACASALW
+                       cmp = arm64.ACMPW
+                       mov = arm64.AMOVW
+               }
+               r0 := v.Args[0].Reg()
+               r1 := v.Args[1].Reg()
+               r2 := v.Args[2].Reg()
+               out := v.Reg0()
+
+               // MOV          Rarg1, Rtmp
+               p := s.Prog(mov)
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = r1
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = arm64.REGTMP
+
+               // CASAL        Rtmp, (Rarg0), Rarg2
+               p1 := s.Prog(cas)
+               p1.From.Type = obj.TYPE_REG
+               p1.From.Reg = arm64.REGTMP
+               p1.To.Type = obj.TYPE_MEM
+               p1.To.Reg = r0
+               p1.RegTo2 = r2
+
+               // CMP          Rarg1, Rtmp
+               p2 := s.Prog(cmp)
+               p2.From.Type = obj.TYPE_REG
+               p2.From.Reg = r1
+               p2.Reg = arm64.REGTMP
+
+               // CSET         EQ, Rout
+               p3 := s.Prog(arm64.ACSET)
+               p3.From.Type = obj.TYPE_REG
+               p3.From.Reg = arm64.COND_EQ
+               p3.To.Type = obj.TYPE_REG
+               p3.To.Reg = out
+
        case ssa.OpARM64LoweredAtomicAnd8,
                ssa.OpARM64LoweredAtomicAnd32,
                ssa.OpARM64LoweredAtomicOr8,
@@ -725,6 +793,63 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p3.From.Reg = arm64.REGTMP
                p3.To.Type = obj.TYPE_BRANCH
                gc.Patch(p3, p)
+       case ssa.OpARM64LoweredAtomicAnd8Variant,
+               ssa.OpARM64LoweredAtomicAnd32Variant:
+               atomic_clear := arm64.ALDCLRALW
+               if v.Op == ssa.OpARM64LoweredAtomicAnd8Variant {
+                       atomic_clear = arm64.ALDCLRALB
+               }
+               r0 := v.Args[0].Reg()
+               r1 := v.Args[1].Reg()
+               out := v.Reg0()
+
+               // MNV       Rarg1 Rtemp
+               p := s.Prog(arm64.AMVN)
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = r1
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = arm64.REGTMP
+
+               // LDCLRALW  Rtemp, (Rarg0), Rout
+               p1 := s.Prog(atomic_clear)
+               p1.From.Type = obj.TYPE_REG
+               p1.From.Reg = arm64.REGTMP
+               p1.To.Type = obj.TYPE_MEM
+               p1.To.Reg = r0
+               p1.RegTo2 = out
+
+               // AND       Rarg1, Rout
+               p2 := s.Prog(arm64.AAND)
+               p2.From.Type = obj.TYPE_REG
+               p2.From.Reg = r1
+               p2.To.Type = obj.TYPE_REG
+               p2.To.Reg = out
+
+       case ssa.OpARM64LoweredAtomicOr8Variant,
+               ssa.OpARM64LoweredAtomicOr32Variant:
+               atomic_or := arm64.ALDORALW
+               if v.Op == ssa.OpARM64LoweredAtomicOr8Variant {
+                       atomic_or = arm64.ALDORALB
+               }
+               r0 := v.Args[0].Reg()
+               r1 := v.Args[1].Reg()
+               out := v.Reg0()
+
+               // LDORALW  Rarg1, (Rarg0), Rout
+               p := s.Prog(atomic_or)
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = r1
+               p.To.Type = obj.TYPE_MEM
+               p.To.Reg = r0
+               p.RegTo2 = out
+
+               // ORR       Rarg1, Rout
+               p2 := s.Prog(arm64.AORR)
+               p2.From.Type = obj.TYPE_REG
+               p2.From.Reg = r1
+               p2.To.Type = obj.TYPE_REG
+               p2.To.Reg = out
+
        case ssa.OpARM64MOVBreg,
                ssa.OpARM64MOVBUreg,
                ssa.OpARM64MOVHreg,
index 67484904a953e95923c203faf646e7a0458e1dfc..0b38e70cd2a95562f7bf6f1a6ab1b5356d610fb0 100644 (file)
@@ -3458,31 +3458,19 @@ func init() {
                        s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
                        return s.newValue1(ssa.OpSelect0, types.Types[TUINT32], v)
                },
-               sys.AMD64, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
+               sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
        addF("runtime/internal/atomic", "Xchg64",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        v := s.newValue3(ssa.OpAtomicExchange64, types.NewTuple(types.Types[TUINT64], types.TypeMem), args[0], args[1], s.mem())
                        s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
                        return s.newValue1(ssa.OpSelect0, types.Types[TUINT64], v)
                },
-               sys.AMD64, sys.ARM64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
-
-       addF("runtime/internal/atomic", "Xadd",
-               func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
-                       v := s.newValue3(ssa.OpAtomicAdd32, types.NewTuple(types.Types[TUINT32], types.TypeMem), args[0], args[1], s.mem())
-                       s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
-                       return s.newValue1(ssa.OpSelect0, types.Types[TUINT32], v)
-               },
-               sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
-       addF("runtime/internal/atomic", "Xadd64",
-               func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
-                       v := s.newValue3(ssa.OpAtomicAdd64, types.NewTuple(types.Types[TUINT64], types.TypeMem), args[0], args[1], s.mem())
-                       s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
-                       return s.newValue1(ssa.OpSelect0, types.Types[TUINT64], v)
-               },
                sys.AMD64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
 
-       makeXaddARM64 := func(op0 ssa.Op, op1 ssa.Op, ty types.EType) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+       type atomicOpEmitter func(s *state, n *Node, args []*ssa.Value, op ssa.Op, typ types.EType)
+
+       makeAtomicGuardedIntrinsicARM64 := func(op0, op1 ssa.Op, typ, rtyp types.EType, emit atomicOpEmitter) intrinsicBuilder {
+
                return func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        // Target Atomic feature is identified by dynamic detection
                        addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), arm64HasATOMICS, s.sb)
@@ -3495,33 +3483,60 @@ func init() {
                        bEnd := s.f.NewBlock(ssa.BlockPlain)
                        b.AddEdgeTo(bTrue)
                        b.AddEdgeTo(bFalse)
-                       b.Likely = ssa.BranchUnlikely // most machines don't have Atomics nowadays
+                       b.Likely = ssa.BranchLikely
 
                        // We have atomic instructions - use it directly.
                        s.startBlock(bTrue)
-                       v0 := s.newValue3(op1, types.NewTuple(types.Types[ty], types.TypeMem), args[0], args[1], s.mem())
-                       s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v0)
-                       s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[ty], v0)
+                       emit(s, n, args, op1, typ)
                        s.endBlock().AddEdgeTo(bEnd)
 
                        // Use original instruction sequence.
                        s.startBlock(bFalse)
-                       v1 := s.newValue3(op0, types.NewTuple(types.Types[ty], types.TypeMem), args[0], args[1], s.mem())
-                       s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v1)
-                       s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[ty], v1)
+                       emit(s, n, args, op0, typ)
                        s.endBlock().AddEdgeTo(bEnd)
 
                        // Merge results.
                        s.startBlock(bEnd)
-                       return s.variable(n, types.Types[ty])
+                       if rtyp == TNIL {
+                               return nil
+                       } else {
+                               return s.variable(n, types.Types[rtyp])
+                       }
                }
        }
 
+       atomicXchgXaddEmitterARM64 := func(s *state, n *Node, args []*ssa.Value, op ssa.Op, typ types.EType) {
+               v := s.newValue3(op, types.NewTuple(types.Types[typ], types.TypeMem), args[0], args[1], s.mem())
+               s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
+               s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v)
+       }
+       addF("runtime/internal/atomic", "Xchg",
+               makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange32, ssa.OpAtomicExchange32Variant, TUINT32, TUINT32, atomicXchgXaddEmitterARM64),
+               sys.ARM64)
+       addF("runtime/internal/atomic", "Xchg64",
+               makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange64, ssa.OpAtomicExchange64Variant, TUINT64, TUINT64, atomicXchgXaddEmitterARM64),
+               sys.ARM64)
+
        addF("runtime/internal/atomic", "Xadd",
-               makeXaddARM64(ssa.OpAtomicAdd32, ssa.OpAtomicAdd32Variant, TUINT32),
+               func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+                       v := s.newValue3(ssa.OpAtomicAdd32, types.NewTuple(types.Types[TUINT32], types.TypeMem), args[0], args[1], s.mem())
+                       s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
+                       return s.newValue1(ssa.OpSelect0, types.Types[TUINT32], v)
+               },
+               sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
+       addF("runtime/internal/atomic", "Xadd64",
+               func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+                       v := s.newValue3(ssa.OpAtomicAdd64, types.NewTuple(types.Types[TUINT64], types.TypeMem), args[0], args[1], s.mem())
+                       s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
+                       return s.newValue1(ssa.OpSelect0, types.Types[TUINT64], v)
+               },
+               sys.AMD64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
+
+       addF("runtime/internal/atomic", "Xadd",
+               makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd32, ssa.OpAtomicAdd32Variant, TUINT32, TUINT32, atomicXchgXaddEmitterARM64),
                sys.ARM64)
        addF("runtime/internal/atomic", "Xadd64",
-               makeXaddARM64(ssa.OpAtomicAdd64, ssa.OpAtomicAdd64Variant, TUINT64),
+               makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd64, ssa.OpAtomicAdd64Variant, TUINT64, TUINT64, atomicXchgXaddEmitterARM64),
                sys.ARM64)
 
        addF("runtime/internal/atomic", "Cas",
@@ -3530,14 +3545,14 @@ func init() {
                        s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
                        return s.newValue1(ssa.OpSelect0, types.Types[TBOOL], v)
                },
-               sys.AMD64, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
+               sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
        addF("runtime/internal/atomic", "Cas64",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        v := s.newValue4(ssa.OpAtomicCompareAndSwap64, types.NewTuple(types.Types[TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
                        s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
                        return s.newValue1(ssa.OpSelect0, types.Types[TBOOL], v)
                },
-               sys.AMD64, sys.ARM64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
+               sys.AMD64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
        addF("runtime/internal/atomic", "CasRel",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        v := s.newValue4(ssa.OpAtomicCompareAndSwap32, types.NewTuple(types.Types[TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
@@ -3546,18 +3561,31 @@ func init() {
                },
                sys.PPC64)
 
+       atomicCasEmitterARM64 := func(s *state, n *Node, args []*ssa.Value, op ssa.Op, typ types.EType) {
+               v := s.newValue4(op, types.NewTuple(types.Types[TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
+               s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
+               s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v)
+       }
+
+       addF("runtime/internal/atomic", "Cas",
+               makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicCompareAndSwap32, ssa.OpAtomicCompareAndSwap32Variant, TUINT32, TBOOL, atomicCasEmitterARM64),
+               sys.ARM64)
+       addF("runtime/internal/atomic", "Cas64",
+               makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicCompareAndSwap64, ssa.OpAtomicCompareAndSwap64Variant, TUINT64, TBOOL, atomicCasEmitterARM64),
+               sys.ARM64)
+
        addF("runtime/internal/atomic", "And8",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        s.vars[&memVar] = s.newValue3(ssa.OpAtomicAnd8, types.TypeMem, args[0], args[1], s.mem())
                        return nil
                },
-               sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X)
+               sys.AMD64, sys.MIPS, sys.PPC64, sys.S390X)
        addF("runtime/internal/atomic", "And",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        s.vars[&memVar] = s.newValue3(ssa.OpAtomicAnd32, types.TypeMem, args[0], args[1], s.mem())
                        return nil
                },
-               sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X)
+               sys.AMD64, sys.MIPS, sys.PPC64, sys.S390X)
        addF("runtime/internal/atomic", "Or8",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr8, types.TypeMem, args[0], args[1], s.mem())
@@ -3569,7 +3597,24 @@ func init() {
                        s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr32, types.TypeMem, args[0], args[1], s.mem())
                        return nil
                },
-               sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X)
+               sys.AMD64, sys.MIPS, sys.PPC64, sys.S390X)
+
+       atomicAndOrEmitterARM64 := func(s *state, n *Node, args []*ssa.Value, op ssa.Op, typ types.EType) {
+               s.vars[&memVar] = s.newValue3(op, types.TypeMem, args[0], args[1], s.mem())
+       }
+
+       addF("runtime/internal/atomic", "And8",
+               makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd8, ssa.OpAtomicAnd8Variant, TNIL, TNIL, atomicAndOrEmitterARM64),
+               sys.ARM64)
+       addF("runtime/internal/atomic", "And",
+               makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd32, ssa.OpAtomicAnd32Variant, TNIL, TNIL, atomicAndOrEmitterARM64),
+               sys.ARM64)
+       addF("runtime/internal/atomic", "Or8",
+               makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr8, ssa.OpAtomicOr8Variant, TNIL, TNIL, atomicAndOrEmitterARM64),
+               sys.ARM64)
+       addF("runtime/internal/atomic", "Or",
+               makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr32, ssa.OpAtomicOr32Variant, TNIL, TNIL, atomicAndOrEmitterARM64),
+               sys.ARM64)
 
        alias("runtime/internal/atomic", "Loadint64", "runtime/internal/atomic", "Load64", all...)
        alias("runtime/internal/atomic", "Xaddint64", "runtime/internal/atomic", "Xadd64", all...)
index 7e014fe0a8da7587d4150a0d54c3ce0951501f17..9edc0c94b00e7f8f717a9335ea9d6feb107f9a91 100644 (file)
 (AtomicStore64      ...) => (STLR  ...)
 (AtomicStorePtrNoWB ...) => (STLR  ...)
 
-(AtomicExchange(32|64) ...) => (LoweredAtomicExchange(32|64) ...)
-(AtomicAdd(32|64) ...) => (LoweredAtomicAdd(32|64) ...)
+(AtomicExchange(32|64)       ...) => (LoweredAtomicExchange(32|64) ...)
+(AtomicAdd(32|64)            ...) => (LoweredAtomicAdd(32|64) ...)
 (AtomicCompareAndSwap(32|64) ...) => (LoweredAtomicCas(32|64) ...)
 
+(AtomicAdd(32|64)Variant            ...) => (LoweredAtomicAdd(32|64)Variant      ...)
+(AtomicExchange(32|64)Variant       ...) => (LoweredAtomicExchange(32|64)Variant ...)
+(AtomicCompareAndSwap(32|64)Variant ...) => (LoweredAtomicCas(32|64)Variant      ...)
+
 // Currently the updated value is not used, but we need a register to temporarily hold it.
 (AtomicAnd8  ptr val mem) => (Select1 (LoweredAtomicAnd8  ptr val mem))
 (AtomicAnd32 ptr val mem) => (Select1 (LoweredAtomicAnd32 ptr val mem))
 (AtomicOr8   ptr val mem) => (Select1 (LoweredAtomicOr8   ptr val mem))
 (AtomicOr32  ptr val mem) => (Select1 (LoweredAtomicOr32  ptr val mem))
 
-(AtomicAdd(32|64)Variant ...) => (LoweredAtomicAdd(32|64)Variant ...)
+(AtomicAnd8Variant  ptr val mem) => (Select1 (LoweredAtomicAnd8Variant  ptr val mem))
+(AtomicAnd32Variant ptr val mem) => (Select1 (LoweredAtomicAnd32Variant ptr val mem))
+(AtomicOr8Variant   ptr val mem) => (Select1 (LoweredAtomicOr8Variant   ptr val mem))
+(AtomicOr32Variant  ptr val mem) => (Select1 (LoweredAtomicOr32Variant  ptr val mem))
 
 // Write barrier.
 (WB ...) => (LoweredWB ...)
index fe9edbf9333b92b771b288251589b2c60fcc851d..87db2b7c9d906a950746080150bfaf84e67e2de9 100644 (file)
@@ -621,6 +621,12 @@ func init() {
                {name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
                {name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
 
+               // atomic exchange variant.
+               // store arg1 to arg0. arg2=mem. returns <old content of *arg0, memory>. auxint must be zero.
+               // SWPALD       Rarg1, (Rarg0), Rout
+               {name: "LoweredAtomicExchange64Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},
+               {name: "LoweredAtomicExchange32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},
+
                // atomic add.
                // *arg0 += arg1. arg2=mem. returns <new content of *arg0, memory>. auxint must be zero.
                // LDAXR        (Rarg0), Rout
@@ -654,6 +660,21 @@ func init() {
                {name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
                {name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
 
+               // atomic compare and swap variant.
+               // arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. auxint must be zero.
+               // if *arg0 == arg1 {
+               //   *arg0 = arg2
+               //   return (true, memory)
+               // } else {
+               //   return (false, memory)
+               // }
+               // MOV          Rarg1, Rtmp
+               // CASAL        Rtmp, (Rarg0), Rarg2
+               // CMP          Rarg1, Rtmp
+               // CSET         EQ, Rout
+               {name: "LoweredAtomicCas64Variant", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
+               {name: "LoweredAtomicCas32Variant", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
+
                // atomic and/or.
                // *arg0 &= (|=) arg1. arg2=mem. returns <new content of *arg0, memory>. auxint must be zero.
                // LDAXR        (Rarg0), Rout
@@ -665,6 +686,20 @@ func init() {
                {name: "LoweredAtomicOr8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
                {name: "LoweredAtomicOr32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
 
+               // atomic and/or variant.
+               // *arg0 &= (|=) arg1. arg2=mem. returns <new content of *arg0, memory>. auxint must be zero.
+               //   AND:
+               // MNV       Rarg1, Rtemp
+               // LDANDALB  Rtemp, (Rarg0), Rout
+               // AND       Rarg1, Rout
+               //   OR:
+               // LDORALB  Rarg1, (Rarg0), Rout
+               // ORR       Rarg1, Rout
+               {name: "LoweredAtomicAnd8Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
+               {name: "LoweredAtomicAnd32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
+               {name: "LoweredAtomicOr8Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true},
+               {name: "LoweredAtomicOr32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true},
+
                // LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
                // It saves all GP registers if necessary,
                // but clobbers R30 (LR) because it's a call.
index db8d7ba0cf009ee3825d0d39afe7a95ce3d31cea..9565199d516d57def2666c595be00f291d370e57 100644 (file)
@@ -574,8 +574,16 @@ var genericOps = []opData{
        // These variants have the same semantics as above atomic operations.
        // But they are used for generating more efficient code on certain modern machines, with run-time CPU feature detection.
        // Currently, they are used on ARM64 only.
-       {name: "AtomicAdd32Variant", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Do *arg0 += arg1.  arg2=memory.  Returns sum and new memory.
-       {name: "AtomicAdd64Variant", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true}, // Do *arg0 += arg1.  arg2=memory.  Returns sum and new memory.
+       {name: "AtomicAdd32Variant", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true},          // Do *arg0 += arg1.  arg2=memory.  Returns sum and new memory.
+       {name: "AtomicAdd64Variant", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true},          // Do *arg0 += arg1.  arg2=memory.  Returns sum and new memory.
+       {name: "AtomicExchange32Variant", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true},     // Store arg1 to *arg0.  arg2=memory.  Returns old contents of *arg0 and new memory.
+       {name: "AtomicExchange64Variant", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true},     // Store arg1 to *arg0.  arg2=memory.  Returns old contents of *arg0 and new memory.
+       {name: "AtomicCompareAndSwap32Variant", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2.  Returns true if store happens and new memory.
+       {name: "AtomicCompareAndSwap64Variant", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2.  Returns true if store happens and new memory.
+       {name: "AtomicAnd8Variant", argLength: 3, typ: "Mem", hasSideEffects: true},                    // *arg0 &= arg1.  arg2=memory.  Returns memory.
+       {name: "AtomicAnd32Variant", argLength: 3, typ: "Mem", hasSideEffects: true},                    // *arg0 &= arg1.  arg2=memory.  Returns memory.
+       {name: "AtomicOr8Variant", argLength: 3, typ: "Mem", hasSideEffects: true},                     // *arg0 |= arg1.  arg2=memory.  Returns memory.
+       {name: "AtomicOr32Variant", argLength: 3, typ: "Mem", hasSideEffects: true},                     // *arg0 |= arg1.  arg2=memory.  Returns memory.
 
        // Clobber experiment op
        {name: "Clobber", argLength: 0, typ: "Void", aux: "SymOff", symEffect: "None"}, // write an invalid pointer value to the given pointer slot of a stack variable
index 25c1df12eed956a8f3a8e17610464357f1c94c46..c0b663cd8f62056ec6e0288cabfa20af5680df7b 100644 (file)
@@ -1581,16 +1581,24 @@ const (
        OpARM64STLRW
        OpARM64LoweredAtomicExchange64
        OpARM64LoweredAtomicExchange32
+       OpARM64LoweredAtomicExchange64Variant
+       OpARM64LoweredAtomicExchange32Variant
        OpARM64LoweredAtomicAdd64
        OpARM64LoweredAtomicAdd32
        OpARM64LoweredAtomicAdd64Variant
        OpARM64LoweredAtomicAdd32Variant
        OpARM64LoweredAtomicCas64
        OpARM64LoweredAtomicCas32
+       OpARM64LoweredAtomicCas64Variant
+       OpARM64LoweredAtomicCas32Variant
        OpARM64LoweredAtomicAnd8
        OpARM64LoweredAtomicAnd32
        OpARM64LoweredAtomicOr8
        OpARM64LoweredAtomicOr32
+       OpARM64LoweredAtomicAnd8Variant
+       OpARM64LoweredAtomicAnd32Variant
+       OpARM64LoweredAtomicOr8Variant
+       OpARM64LoweredAtomicOr32Variant
        OpARM64LoweredWB
        OpARM64LoweredPanicBoundsA
        OpARM64LoweredPanicBoundsB
@@ -2881,6 +2889,14 @@ const (
        OpAtomicOr32
        OpAtomicAdd32Variant
        OpAtomicAdd64Variant
+       OpAtomicExchange32Variant
+       OpAtomicExchange64Variant
+       OpAtomicCompareAndSwap32Variant
+       OpAtomicCompareAndSwap64Variant
+       OpAtomicAnd8Variant
+       OpAtomicAnd32Variant
+       OpAtomicOr8Variant
+       OpAtomicOr32Variant
        OpClobber
 )
 
@@ -20994,6 +21010,38 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:            "LoweredAtomicExchange64Variant",
+               argLen:          3,
+               resultNotInArgs: true,
+               faultOnNilArg0:  true,
+               hasSideEffects:  true,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:            "LoweredAtomicExchange32Variant",
+               argLen:          3,
+               resultNotInArgs: true,
+               faultOnNilArg0:  true,
+               hasSideEffects:  true,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
        {
                name:            "LoweredAtomicAdd64",
                argLen:          3,
@@ -21098,6 +21146,44 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:            "LoweredAtomicCas64Variant",
+               argLen:          4,
+               resultNotInArgs: true,
+               clobberFlags:    true,
+               faultOnNilArg0:  true,
+               hasSideEffects:  true,
+               unsafePoint:     true,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {2, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:            "LoweredAtomicCas32Variant",
+               argLen:          4,
+               resultNotInArgs: true,
+               clobberFlags:    true,
+               faultOnNilArg0:  true,
+               hasSideEffects:  true,
+               unsafePoint:     true,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {2, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
        {
                name:            "LoweredAtomicAnd8",
                argLen:          3,
@@ -21170,6 +21256,72 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:            "LoweredAtomicAnd8Variant",
+               argLen:          3,
+               resultNotInArgs: true,
+               faultOnNilArg0:  true,
+               hasSideEffects:  true,
+               unsafePoint:     true,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:            "LoweredAtomicAnd32Variant",
+               argLen:          3,
+               resultNotInArgs: true,
+               faultOnNilArg0:  true,
+               hasSideEffects:  true,
+               unsafePoint:     true,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:            "LoweredAtomicOr8Variant",
+               argLen:          3,
+               resultNotInArgs: true,
+               faultOnNilArg0:  true,
+               hasSideEffects:  true,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:            "LoweredAtomicOr32Variant",
+               argLen:          3,
+               resultNotInArgs: true,
+               faultOnNilArg0:  true,
+               hasSideEffects:  true,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
        {
                name:         "LoweredWB",
                auxType:      auxSym,
@@ -35874,6 +36026,54 @@ var opcodeTable = [...]opInfo{
                hasSideEffects: true,
                generic:        true,
        },
+       {
+               name:           "AtomicExchange32Variant",
+               argLen:         3,
+               hasSideEffects: true,
+               generic:        true,
+       },
+       {
+               name:           "AtomicExchange64Variant",
+               argLen:         3,
+               hasSideEffects: true,
+               generic:        true,
+       },
+       {
+               name:           "AtomicCompareAndSwap32Variant",
+               argLen:         4,
+               hasSideEffects: true,
+               generic:        true,
+       },
+       {
+               name:           "AtomicCompareAndSwap64Variant",
+               argLen:         4,
+               hasSideEffects: true,
+               generic:        true,
+       },
+       {
+               name:           "AtomicAnd8Variant",
+               argLen:         3,
+               hasSideEffects: true,
+               generic:        true,
+       },
+       {
+               name:           "AtomicAnd32Variant",
+               argLen:         3,
+               hasSideEffects: true,
+               generic:        true,
+       },
+       {
+               name:           "AtomicOr8Variant",
+               argLen:         3,
+               hasSideEffects: true,
+               generic:        true,
+       },
+       {
+               name:           "AtomicOr32Variant",
+               argLen:         3,
+               hasSideEffects: true,
+               generic:        true,
+       },
        {
                name:      "Clobber",
                auxType:   auxSymOff,
index 9a5e976deaca045ae98658c13266590f8951a1a9..353696bf39335ee0030e821fb6a63764affd7abb 100644 (file)
@@ -426,20 +426,36 @@ func rewriteValueARM64(v *Value) bool {
                return true
        case OpAtomicAnd32:
                return rewriteValueARM64_OpAtomicAnd32(v)
+       case OpAtomicAnd32Variant:
+               return rewriteValueARM64_OpAtomicAnd32Variant(v)
        case OpAtomicAnd8:
                return rewriteValueARM64_OpAtomicAnd8(v)
+       case OpAtomicAnd8Variant:
+               return rewriteValueARM64_OpAtomicAnd8Variant(v)
        case OpAtomicCompareAndSwap32:
                v.Op = OpARM64LoweredAtomicCas32
                return true
+       case OpAtomicCompareAndSwap32Variant:
+               v.Op = OpARM64LoweredAtomicCas32Variant
+               return true
        case OpAtomicCompareAndSwap64:
                v.Op = OpARM64LoweredAtomicCas64
                return true
+       case OpAtomicCompareAndSwap64Variant:
+               v.Op = OpARM64LoweredAtomicCas64Variant
+               return true
        case OpAtomicExchange32:
                v.Op = OpARM64LoweredAtomicExchange32
                return true
+       case OpAtomicExchange32Variant:
+               v.Op = OpARM64LoweredAtomicExchange32Variant
+               return true
        case OpAtomicExchange64:
                v.Op = OpARM64LoweredAtomicExchange64
                return true
+       case OpAtomicExchange64Variant:
+               v.Op = OpARM64LoweredAtomicExchange64Variant
+               return true
        case OpAtomicLoad32:
                v.Op = OpARM64LDARW
                return true
@@ -454,8 +470,12 @@ func rewriteValueARM64(v *Value) bool {
                return true
        case OpAtomicOr32:
                return rewriteValueARM64_OpAtomicOr32(v)
+       case OpAtomicOr32Variant:
+               return rewriteValueARM64_OpAtomicOr32Variant(v)
        case OpAtomicOr8:
                return rewriteValueARM64_OpAtomicOr8(v)
+       case OpAtomicOr8Variant:
+               return rewriteValueARM64_OpAtomicOr8Variant(v)
        case OpAtomicStore32:
                v.Op = OpARM64STLRW
                return true
@@ -21363,6 +21383,25 @@ func rewriteValueARM64_OpAtomicAnd32(v *Value) bool {
                return true
        }
 }
+func rewriteValueARM64_OpAtomicAnd32Variant(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (AtomicAnd32Variant ptr val mem)
+       // result: (Select1 (LoweredAtomicAnd32Variant ptr val mem))
+       for {
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicAnd32Variant, types.NewTuple(typ.UInt32, types.TypeMem))
+               v0.AddArg3(ptr, val, mem)
+               v.AddArg(v0)
+               return true
+       }
+}
 func rewriteValueARM64_OpAtomicAnd8(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
@@ -21382,6 +21421,25 @@ func rewriteValueARM64_OpAtomicAnd8(v *Value) bool {
                return true
        }
 }
+func rewriteValueARM64_OpAtomicAnd8Variant(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (AtomicAnd8Variant ptr val mem)
+       // result: (Select1 (LoweredAtomicAnd8Variant ptr val mem))
+       for {
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicAnd8Variant, types.NewTuple(typ.UInt8, types.TypeMem))
+               v0.AddArg3(ptr, val, mem)
+               v.AddArg(v0)
+               return true
+       }
+}
 func rewriteValueARM64_OpAtomicOr32(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
@@ -21401,6 +21459,25 @@ func rewriteValueARM64_OpAtomicOr32(v *Value) bool {
                return true
        }
 }
+func rewriteValueARM64_OpAtomicOr32Variant(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (AtomicOr32Variant ptr val mem)
+       // result: (Select1 (LoweredAtomicOr32Variant ptr val mem))
+       for {
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicOr32Variant, types.NewTuple(typ.UInt32, types.TypeMem))
+               v0.AddArg3(ptr, val, mem)
+               v.AddArg(v0)
+               return true
+       }
+}
 func rewriteValueARM64_OpAtomicOr8(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
@@ -21420,6 +21497,25 @@ func rewriteValueARM64_OpAtomicOr8(v *Value) bool {
                return true
        }
 }
+func rewriteValueARM64_OpAtomicOr8Variant(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (AtomicOr8Variant ptr val mem)
+       // result: (Select1 (LoweredAtomicOr8Variant ptr val mem))
+       for {
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicOr8Variant, types.NewTuple(typ.UInt8, types.TypeMem))
+               v0.AddArg3(ptr, val, mem)
+               v.AddArg(v0)
+               return true
+       }
+}
 func rewriteValueARM64_OpAvg64u(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
index 434aa6d43404ac95aadf1849762965271fca232e..2476c06c52afbdfb24b702bdd87c9aeeb817a9fb 100644 (file)
@@ -142,3 +142,54 @@ func BenchmarkXadd64(b *testing.B) {
                }
        })
 }
+
+func BenchmarkCas(b *testing.B) {
+       var x uint32
+       x = 1
+       ptr := &x
+       b.RunParallel(func(pb *testing.PB) {
+               for pb.Next() {
+                       atomic.Cas(ptr, 1, 0)
+                       atomic.Cas(ptr, 0, 1)
+               }
+       })
+}
+
+func BenchmarkCas64(b *testing.B) {
+       var x uint64
+       x = 1
+       ptr := &x
+       b.RunParallel(func(pb *testing.PB) {
+               for pb.Next() {
+                       atomic.Cas64(ptr, 1, 0)
+                       atomic.Cas64(ptr, 0, 1)
+               }
+       })
+}
+func BenchmarkXchg(b *testing.B) {
+       var x uint32
+       x = 1
+       ptr := &x
+       b.RunParallel(func(pb *testing.PB) {
+               var y uint32
+               y = 1
+               for pb.Next() {
+                       y = atomic.Xchg(ptr, y)
+                       y += 1
+               }
+       })
+}
+
+func BenchmarkXchg64(b *testing.B) {
+       var x uint64
+       x = 1
+       ptr := &x
+       b.RunParallel(func(pb *testing.PB) {
+               var y uint64
+               y = 1
+               for pb.Next() {
+                       y = atomic.Xchg64(ptr, y)
+                       y += 1
+               }
+       })
+}