]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: improve atomic add intrinsics with ARMv8.1 new instruction
authorWei Xiao <wei.xiao@arm.com>
Fri, 3 Nov 2017 02:05:28 +0000 (02:05 +0000)
committerCherry Zhang <cherryyz@google.com>
Thu, 21 Jun 2018 14:52:43 +0000 (14:52 +0000)
ARMv8.1 has added new instruction (LDADDAL) for atomic memory operations. This
CL improves existing atomic add intrinsics with the new instruction. Since the
new instruction is only guaranteed to be present after ARMv8.1, we guard its
usage with a conditional on CPU feature.

Performance result on ARMv8.1 machine:
name        old time/op  new time/op  delta
Xadd-224    1.05µs ± 6%  0.02µs ± 4%  -98.06%  (p=0.000 n=10+8)
Xadd64-224  1.05µs ± 3%  0.02µs ±13%  -98.10%  (p=0.000 n=9+10)
[Geo mean]  1.05µs       0.02µs       -98.08%

Performance result on ARMv8.0 machine:
name        old time/op  new time/op  delta
Xadd-46      538ns ± 1%   541ns ± 1%  +0.62%  (p=0.000 n=9+9)
Xadd64-46    505ns ± 1%   508ns ± 0%  +0.48%  (p=0.003 n=9+8)
[Geo mean]   521ns        524ns       +0.55%

Change-Id: If4b5d8d0e2d6f84fe1492a4f5de0789910ad0ee9
Reviewed-on: https://go-review.googlesource.com/81877
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
16 files changed:
src/cmd/asm/internal/arch/arm64.go
src/cmd/asm/internal/asm/testdata/arm64.s
src/cmd/compile/internal/arm64/ssa.go
src/cmd/compile/internal/gc/go.go
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/gen/ARM64Ops.go
src/cmd/compile/internal/ssa/gen/genericOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteARM64.go
src/cmd/internal/obj/arm64/a.out.go
src/cmd/internal/obj/arm64/anames.go
src/cmd/internal/obj/arm64/asm7.go
src/runtime/internal/atomic/bench_test.go
src/runtime/proc.go
src/runtime/runtime2.go

index e7ef928fa21b5963cb2a5fac4b8b9a55ee4ea678..475d7da5f9363c01c55c6dac8fab8f4e0f4b9d19 100644 (file)
@@ -77,7 +77,8 @@ func IsARM64STLXR(op obj.As) bool {
                arm64.ALDADDB, arm64.ALDADDH, arm64.ALDADDW, arm64.ALDADDD,
                arm64.ALDANDB, arm64.ALDANDH, arm64.ALDANDW, arm64.ALDANDD,
                arm64.ALDEORB, arm64.ALDEORH, arm64.ALDEORW, arm64.ALDEORD,
-               arm64.ALDORB, arm64.ALDORH, arm64.ALDORW, arm64.ALDORD:
+               arm64.ALDORB, arm64.ALDORH, arm64.ALDORW, arm64.ALDORD,
+               arm64.ALDADDALD, arm64.ALDADDALW:
                return true
        }
        return false
index 54be761c543ce820a409fdff2706e975f1b2b092..859f71a26b579ce4695a7926136a49262a18b580 100644 (file)
@@ -604,6 +604,8 @@ again:
        LDORH   R5, (RSP), R7                        // e7332578
        LDORB   R5, (R6), R7                         // c7302538
        LDORB   R5, (RSP), R7                        // e7332538
+       LDADDALD        R2, (R1), R3                 // 2300e2f8
+       LDADDALW        R5, (R4), R6                 // 8600e5b8
 
 // RET
 //
index 501eafe03f20c9c54dbf5ea14a0bb219404c1db6..c396ba06d1a5a63e46bc8f41fa3082a560e92b65 100644 (file)
@@ -553,6 +553,28 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p3.From.Reg = arm64.REGTMP
                p3.To.Type = obj.TYPE_BRANCH
                gc.Patch(p3, p)
+       case ssa.OpARM64LoweredAtomicAdd64Variant,
+               ssa.OpARM64LoweredAtomicAdd32Variant:
+               // LDADDAL      Rarg1, (Rarg0), Rout
+               // ADD          Rarg1, Rout
+               op := arm64.ALDADDALD
+               if v.Op == ssa.OpARM64LoweredAtomicAdd32Variant {
+                       op = arm64.ALDADDALW
+               }
+               r0 := v.Args[0].Reg()
+               r1 := v.Args[1].Reg()
+               out := v.Reg0()
+               p := s.Prog(op)
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = r1
+               p.To.Type = obj.TYPE_MEM
+               p.To.Reg = r0
+               p.RegTo2 = out
+               p1 := s.Prog(arm64.AADD)
+               p1.From.Type = obj.TYPE_REG
+               p1.From.Reg = r1
+               p1.To.Type = obj.TYPE_REG
+               p1.To.Reg = out
        case ssa.OpARM64LoweredAtomicCas64,
                ssa.OpARM64LoweredAtomicCas32:
                // LDAXR        (Rarg0), Rtmp
index a471a909d65b1830db415c94bd0d2dd21c9ee664..95bf562e2c1afc986e52786a4ee8387dfe6af88f 100644 (file)
@@ -303,6 +303,7 @@ var (
        racewriterange,
        supportPopcnt,
        supportSSE41,
+       arm64SupportAtomics,
        typedmemclr,
        typedmemmove,
        Udiv,
index 3c15c8e555a842e966b34e6ef7da02bf88f98332..92bfa7de4fb375300f73d3504e866f6d548d9fda 100644 (file)
@@ -78,6 +78,7 @@ func initssaconfig() {
        racewriterange = sysfunc("racewriterange")
        supportPopcnt = sysfunc("support_popcnt")
        supportSSE41 = sysfunc("support_sse41")
+       arm64SupportAtomics = sysfunc("arm64_support_atomics")
        typedmemclr = sysfunc("typedmemclr")
        typedmemmove = sysfunc("typedmemmove")
        Udiv = sysfunc("udiv")
@@ -2935,14 +2936,56 @@ func init() {
                        s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
                        return s.newValue1(ssa.OpSelect0, types.Types[TUINT32], v)
                },
-               sys.AMD64, sys.ARM64, sys.S390X, sys.MIPS, sys.MIPS64, sys.PPC64)
+               sys.AMD64, sys.S390X, sys.MIPS, sys.MIPS64, sys.PPC64)
        addF("runtime/internal/atomic", "Xadd64",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        v := s.newValue3(ssa.OpAtomicAdd64, types.NewTuple(types.Types[TUINT64], types.TypeMem), args[0], args[1], s.mem())
                        s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
                        return s.newValue1(ssa.OpSelect0, types.Types[TUINT64], v)
                },
-               sys.AMD64, sys.ARM64, sys.S390X, sys.MIPS64, sys.PPC64)
+               sys.AMD64, sys.S390X, sys.MIPS64, sys.PPC64)
+
+       makeXaddARM64 := func(op0 ssa.Op, op1 ssa.Op, ty types.EType) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+               return func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+                       // Target Atomic feature is identified by dynamic detection
+                       addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), arm64SupportAtomics, s.sb)
+                       v := s.load(types.Types[TBOOL], addr)
+                       b := s.endBlock()
+                       b.Kind = ssa.BlockIf
+                       b.SetControl(v)
+                       bTrue := s.f.NewBlock(ssa.BlockPlain)
+                       bFalse := s.f.NewBlock(ssa.BlockPlain)
+                       bEnd := s.f.NewBlock(ssa.BlockPlain)
+                       b.AddEdgeTo(bTrue)
+                       b.AddEdgeTo(bFalse)
+                       b.Likely = ssa.BranchUnlikely // most machines don't have Atomics nowadays
+
+                       // We have atomic instructions - use it directly.
+                       s.startBlock(bTrue)
+                       v0 := s.newValue3(op1, types.NewTuple(types.Types[ty], types.TypeMem), args[0], args[1], s.mem())
+                       s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v0)
+                       s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[ty], v0)
+                       s.endBlock().AddEdgeTo(bEnd)
+
+                       // Use original instruction sequence.
+                       s.startBlock(bFalse)
+                       v1 := s.newValue3(op0, types.NewTuple(types.Types[ty], types.TypeMem), args[0], args[1], s.mem())
+                       s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v1)
+                       s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[ty], v1)
+                       s.endBlock().AddEdgeTo(bEnd)
+
+                       // Merge results.
+                       s.startBlock(bEnd)
+                       return s.variable(n, types.Types[ty])
+               }
+       }
+
+       addF("runtime/internal/atomic", "Xadd",
+               makeXaddARM64(ssa.OpAtomicAdd32, ssa.OpAtomicAdd32Variant, TUINT32),
+               sys.ARM64)
+       addF("runtime/internal/atomic", "Xadd64",
+               makeXaddARM64(ssa.OpAtomicAdd64, ssa.OpAtomicAdd64Variant, TUINT64),
+               sys.ARM64)
 
        addF("runtime/internal/atomic", "Cas",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
index a1a3cccf3c2529e3ced608573bfc7e8f1063172a..a7e747e6e70f5c36c942d5fce9b12118fd6050de 100644 (file)
 (AtomicAnd8 ptr val mem) -> (Select1 (LoweredAtomicAnd8 ptr val mem))
 (AtomicOr8  ptr val mem) -> (Select1 (LoweredAtomicOr8  ptr val mem))
 
+(AtomicAdd32Variant ptr val mem) -> (LoweredAtomicAdd32Variant ptr val mem)
+(AtomicAdd64Variant ptr val mem) -> (LoweredAtomicAdd64Variant ptr val mem)
+
 // Write barrier.
 (WB {fn} destptr srcptr mem) -> (LoweredWB {fn} destptr srcptr mem)
 
index 9e8b07ec4b3ef1be8ee11630b19715aeb9ad8103..c87c18f3fb58701ccaf3b7de65288ea486e3096e 100644 (file)
@@ -578,6 +578,13 @@ func init() {
                {name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},
                {name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},
 
+               // atomic add variant.
+               // *arg0 += arg1. arg2=mem. returns <new content of *arg0, memory>. auxint must be zero.
+               // LDADDAL      (Rarg0), Rarg1, Rout
+               // ADD          Rarg1, Rout
+               {name: "LoweredAtomicAdd64Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},
+               {name: "LoweredAtomicAdd32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},
+
                // atomic compare and swap.
                // arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. auxint must be zero.
                // if *arg0 == arg1 {
index 13581452e733701244f12e793f63cbd5217675c3..07d93ac07350cab8c890165a042df26096a17ba3 100644 (file)
@@ -515,6 +515,13 @@ var genericOps = []opData{
        {name: "AtomicAnd8", argLength: 3, typ: "Mem", hasSideEffects: true},                    // *arg0 &= arg1.  arg2=memory.  Returns memory.
        {name: "AtomicOr8", argLength: 3, typ: "Mem", hasSideEffects: true},                     // *arg0 |= arg1.  arg2=memory.  Returns memory.
 
+       // Atomic operation variants
+       // These variants have the same semantics as above atomic operations.
+       // But they are used for generating more efficient code on certain modern machines, with run-time CPU feature detection.
+       // Currently, they are used on ARM64 only.
+       {name: "AtomicAdd32Variant", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Do *arg0 += arg1.  arg2=memory.  Returns sum and new memory.
+       {name: "AtomicAdd64Variant", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true}, // Do *arg0 += arg1.  arg2=memory.  Returns sum and new memory.
+
        // Clobber experiment op
        {name: "Clobber", argLength: 0, typ: "Void", aux: "SymOff", symEffect: "None"}, // write an invalid pointer value to the given pointer slot of a stack variable
 }
index eec5b027139ec83a1c747039d95d5803cfd0b1c7..01ce5e9e7d1c4b11bf69ba0ca766865a91c687b4 100644 (file)
@@ -1275,6 +1275,8 @@ const (
        OpARM64LoweredAtomicExchange32
        OpARM64LoweredAtomicAdd64
        OpARM64LoweredAtomicAdd32
+       OpARM64LoweredAtomicAdd64Variant
+       OpARM64LoweredAtomicAdd32Variant
        OpARM64LoweredAtomicCas64
        OpARM64LoweredAtomicCas32
        OpARM64LoweredAtomicAnd8
@@ -2287,6 +2289,8 @@ const (
        OpAtomicCompareAndSwap64
        OpAtomicAnd8
        OpAtomicOr8
+       OpAtomicAdd32Variant
+       OpAtomicAdd64Variant
        OpClobber
 )
 
@@ -16722,6 +16726,38 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:            "LoweredAtomicAdd64Variant",
+               argLen:          3,
+               resultNotInArgs: true,
+               faultOnNilArg0:  true,
+               hasSideEffects:  true,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:            "LoweredAtomicAdd32Variant",
+               argLen:          3,
+               resultNotInArgs: true,
+               faultOnNilArg0:  true,
+               hasSideEffects:  true,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
        {
                name:            "LoweredAtomicCas64",
                argLen:          4,
@@ -27825,6 +27861,18 @@ var opcodeTable = [...]opInfo{
                hasSideEffects: true,
                generic:        true,
        },
+       {
+               name:           "AtomicAdd32Variant",
+               argLen:         3,
+               hasSideEffects: true,
+               generic:        true,
+       },
+       {
+               name:           "AtomicAdd64Variant",
+               argLen:         3,
+               hasSideEffects: true,
+               generic:        true,
+       },
        {
                name:      "Clobber",
                auxType:   auxSymOff,
index 60121038e4bdb09c4dd55437b59d5029a4a1f8cf..d039c731d379e56d395cc7090de59b65069bd410 100644 (file)
@@ -341,8 +341,12 @@ func rewriteValueARM64(v *Value) bool {
                return rewriteValueARM64_OpAndB_0(v)
        case OpAtomicAdd32:
                return rewriteValueARM64_OpAtomicAdd32_0(v)
+       case OpAtomicAdd32Variant:
+               return rewriteValueARM64_OpAtomicAdd32Variant_0(v)
        case OpAtomicAdd64:
                return rewriteValueARM64_OpAtomicAdd64_0(v)
+       case OpAtomicAdd64Variant:
+               return rewriteValueARM64_OpAtomicAdd64Variant_0(v)
        case OpAtomicAnd8:
                return rewriteValueARM64_OpAtomicAnd8_0(v)
        case OpAtomicCompareAndSwap32:
@@ -25908,6 +25912,22 @@ func rewriteValueARM64_OpAtomicAdd32_0(v *Value) bool {
                return true
        }
 }
+func rewriteValueARM64_OpAtomicAdd32Variant_0(v *Value) bool {
+       // match: (AtomicAdd32Variant ptr val mem)
+       // cond:
+       // result: (LoweredAtomicAdd32Variant ptr val mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64LoweredAtomicAdd32Variant)
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+}
 func rewriteValueARM64_OpAtomicAdd64_0(v *Value) bool {
        // match: (AtomicAdd64 ptr val mem)
        // cond:
@@ -25924,6 +25944,22 @@ func rewriteValueARM64_OpAtomicAdd64_0(v *Value) bool {
                return true
        }
 }
+func rewriteValueARM64_OpAtomicAdd64Variant_0(v *Value) bool {
+       // match: (AtomicAdd64Variant ptr val mem)
+       // cond:
+       // result: (LoweredAtomicAdd64Variant ptr val mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64LoweredAtomicAdd64Variant)
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+}
 func rewriteValueARM64_OpAtomicAnd8_0(v *Value) bool {
        b := v.Block
        _ = b
index 8e725c6f2c2d1eeac9c0602abd0ecb7e8bedebf9..9be0183edf88019b1145f50845379c8002bff53e 100644 (file)
@@ -594,6 +594,8 @@ const (
        AHVC
        AIC
        AISB
+       ALDADDALD
+       ALDADDALW
        ALDADDB
        ALDADDH
        ALDADDW
index 30be3b2732003e0a4e49b4a346efdcfde155b478..0579e5362ec5bd9fa7e4627664d1260178cea208 100644 (file)
@@ -96,6 +96,8 @@ var Anames = []string{
        "HVC",
        "IC",
        "ISB",
+       "LDADDALD",
+       "LDADDALW",
        "LDADDB",
        "LDADDH",
        "LDADDW",
index e7271437573efd2218d8b85a517ec8d3e6591fac..192d65df96c8423e544885603a8d1d7b720c2bfa 100644 (file)
@@ -2011,6 +2011,8 @@ func buildop(ctxt *obj.Link) {
                        oprangeset(ASWPB, t)
                        oprangeset(ASWPH, t)
                        oprangeset(ASWPW, t)
+                       oprangeset(ALDADDALD, t)
+                       oprangeset(ALDADDALW, t)
                        oprangeset(ALDADDB, t)
                        oprangeset(ALDADDH, t)
                        oprangeset(ALDADDW, t)
@@ -3363,9 +3365,9 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
                rt := p.RegTo2
                rb := p.To.Reg
                switch p.As {
-               case ASWPD, ALDADDD, ALDANDD, ALDEORD, ALDORD: // 64-bit
+               case ASWPD, ALDADDALD, ALDADDD, ALDANDD, ALDEORD, ALDORD: // 64-bit
                        o1 = 3 << 30
-               case ASWPW, ALDADDW, ALDANDW, ALDEORW, ALDORW: // 32-bit
+               case ASWPW, ALDADDALW, ALDADDW, ALDANDW, ALDEORW, ALDORW: // 32-bit
                        o1 = 2 << 30
                case ASWPH, ALDADDH, ALDANDH, ALDEORH, ALDORH: // 16-bit
                        o1 = 1 << 30
@@ -3377,7 +3379,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
                switch p.As {
                case ASWPD, ASWPW, ASWPH, ASWPB:
                        o1 |= 0x20 << 10
-               case ALDADDD, ALDADDW, ALDADDH, ALDADDB:
+               case ALDADDALD, ALDADDALW, ALDADDD, ALDADDW, ALDADDH, ALDADDB:
                        o1 |= 0x00 << 10
                case ALDANDD, ALDANDW, ALDANDH, ALDANDB:
                        o1 |= 0x04 << 10
@@ -3386,6 +3388,10 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
                case ALDORD, ALDORW, ALDORH, ALDORB:
                        o1 |= 0x0c << 10
                }
+               switch p.As {
+               case ALDADDALD, ALDADDALW:
+                       o1 |= 3 << 22
+               }
                o1 |= 0x1c1<<21 | uint32(rs&31)<<16 | uint32(rb&31)<<5 | uint32(rt&31)
 
        case 50: /* sys/sysl */
index 2a22e88fb80980ffb3ef883d1563a0cec23686d2..083a75cb0757cd25f5ade737af3b22d6b89790dc 100644 (file)
@@ -42,3 +42,23 @@ func BenchmarkAtomicStore(b *testing.B) {
                atomic.Store(&x, 0)
        }
 }
+
+func BenchmarkXadd(b *testing.B) {
+       var x uint32
+       ptr := &x
+       b.RunParallel(func(pb *testing.PB) {
+               for pb.Next() {
+                       atomic.Xadd(ptr, 1)
+               }
+       })
+}
+
+func BenchmarkXadd64(b *testing.B) {
+       var x uint64
+       ptr := &x
+       b.RunParallel(func(pb *testing.PB) {
+               for pb.Next() {
+                       atomic.Xadd64(ptr, 1)
+               }
+       })
+}
index 36c74a1e8c847300c3a909d892b8f8c43d414920..b5486321ed977e78ce350ed52b4184df3c239374 100644 (file)
@@ -517,6 +517,8 @@ func cpuinit() {
        support_popcnt = cpu.X86.HasPOPCNT
        support_sse2 = cpu.X86.HasSSE2
        support_sse41 = cpu.X86.HasSSE41
+
+       arm64_support_atomics = cpu.ARM64.HasATOMICS
 }
 
 // The bootstrap sequence is:
index 1ac008382851ea375d6cb22f7d10a269da9fc379..a3193b63c5fc3edd3b9af4b00feef2b426aebcb6 100644 (file)
@@ -840,10 +840,13 @@ var (
        processorVersionInfo uint32
        isIntel              bool
        lfenceBeforeRdtsc    bool
+
+       // Set in runtime.cpuinit.
        support_erms         bool
        support_popcnt       bool
        support_sse2         bool
        support_sse41        bool
+       arm64_support_atomics      bool
 
        goarm                uint8 // set by cmd/link on arm systems
        framepointer_enabled bool  // set by cmd/link