From: fanzha02 Date: Wed, 10 Apr 2024 08:45:02 +0000 (+0000) Subject: cmd/compile: intrinsify atomic And/Or on arm64 X-Git-Tag: go1.23rc1~157 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=63c1e141bccff884dd9cab51a46c42c1a058fbe2;p=gostls13.git cmd/compile: intrinsify atomic And/Or on arm64 The atomic And/Or operators were added by the CL 528797, the compiler does not intrinsify them, this CL does it for arm64. Also, for the existing atomicAnd/Or operations, the updated value are not used, but at that time we need a register to temporarily hold it. Now that we have v.RegTmp, the new value is not needed anymore. This CL changes it. The other change is that the existing operations don't use their result, but now we need the old value and not the new value for the result. And this CL alias all of the And/Or operations into sync/atomic package. Peformance on an ARMv8.1 machine: old.txt new.txt sec/op sec/op vs base And32-160 8.716n ± 0% 4.771n ± 1% -45.26% (p=0.000 n=10) And32Parallel-160 30.58n ± 2% 26.45n ± 4% -13.49% (p=0.000 n=10) And64-160 8.750n ± 1% 4.754n ± 0% -45.67% (p=0.000 n=10) And64Parallel-160 29.40n ± 3% 25.55n ± 5% -13.11% (p=0.000 n=10) Or32-160 8.847n ± 1% 4.754±1% -46.26% (p=0.000 n=10) Or32Parallel-160 30.75n ± 3% 26.10n ± 4% -15.14% (p=0.000 n=10) Or64-160 8.825n ± 1% 4.766n ± 0% -46.00% (p=0.000 n=10) Or64Parallel-160 30.52n ± 5% 25.89n ± 6% -15.17% (p=0.000 n=10) For #61395 Change-Id: Ib1d1ac83f7f67dcf67f74d003fadb0f80932b826 Reviewed-on: https://go-review.googlesource.com/c/go/+/584715 Auto-Submit: Austin Clements TryBot-Bypass: Austin Clements Reviewed-by: Austin Clements Reviewed-by: Cherry Mui Run-TryBot: Fannie Zhang LUCI-TryBot-Result: Go LUCI --- diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go index 27b4e881c0..900e7016a3 100644 --- a/src/cmd/compile/internal/arm64/ssa.go +++ b/src/cmd/compile/internal/arm64/ssa.go @@ -781,23 +781,30 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p3.To.Type = obj.TYPE_REG p3.To.Reg = out - case ssa.OpARM64LoweredAtomicAnd8, + case ssa.OpARM64LoweredAtomicAnd64, + ssa.OpARM64LoweredAtomicOr64, ssa.OpARM64LoweredAtomicAnd32, - ssa.OpARM64LoweredAtomicOr8, - ssa.OpARM64LoweredAtomicOr32: - // LDAXRB/LDAXRW (Rarg0), Rout - // AND/OR Rarg1, Rout - // STLXRB/STLXRB Rout, (Rarg0), Rtmp + ssa.OpARM64LoweredAtomicOr32, + ssa.OpARM64LoweredAtomicAnd8, + ssa.OpARM64LoweredAtomicOr8: + // LDAXR[BW] (Rarg0), Rout + // AND/OR Rarg1, Rout, tmp1 + // STLXR[BW] tmp1, (Rarg0), Rtmp // CBNZ Rtmp, -3(PC) - ld := arm64.ALDAXRB - st := arm64.ASTLXRB + ld := arm64.ALDAXR + st := arm64.ASTLXR if v.Op == ssa.OpARM64LoweredAtomicAnd32 || v.Op == ssa.OpARM64LoweredAtomicOr32 { ld = arm64.ALDAXRW st = arm64.ASTLXRW } + if v.Op == ssa.OpARM64LoweredAtomicAnd8 || v.Op == ssa.OpARM64LoweredAtomicOr8 { + ld = arm64.ALDAXRB + st = arm64.ASTLXRB + } r0 := v.Args[0].Reg() r1 := v.Args[1].Reg() out := v.Reg0() + tmp := v.RegTmp() p := s.Prog(ld) p.From.Type = obj.TYPE_MEM p.From.Reg = r0 @@ -806,11 +813,12 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p1 := s.Prog(v.Op.Asm()) p1.From.Type = obj.TYPE_REG p1.From.Reg = r1 + p1.Reg = out p1.To.Type = obj.TYPE_REG - p1.To.Reg = out + p1.To.Reg = tmp p2 := s.Prog(st) p2.From.Type = obj.TYPE_REG - p2.From.Reg = out + p2.From.Reg = tmp p2.To.Type = obj.TYPE_MEM p2.To.Reg = r0 p2.RegTo2 = arm64.REGTMP @@ -819,9 +827,14 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p3.From.Reg = arm64.REGTMP p3.To.Type = obj.TYPE_BRANCH p3.To.SetTarget(p) + case ssa.OpARM64LoweredAtomicAnd8Variant, - ssa.OpARM64LoweredAtomicAnd32Variant: - atomic_clear := arm64.ALDCLRALW + ssa.OpARM64LoweredAtomicAnd32Variant, + ssa.OpARM64LoweredAtomicAnd64Variant: + atomic_clear := arm64.ALDCLRALD + if v.Op == ssa.OpARM64LoweredAtomicAnd32Variant { + atomic_clear = arm64.ALDCLRALW + } if v.Op == ssa.OpARM64LoweredAtomicAnd8Variant { atomic_clear = arm64.ALDCLRALB } @@ -836,7 +849,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p.To.Type = obj.TYPE_REG p.To.Reg = arm64.REGTMP - // LDCLRALW Rtemp, (Rarg0), Rout + // LDCLRAL[BDW] Rtemp, (Rarg0), Rout p1 := s.Prog(atomic_clear) p1.From.Type = obj.TYPE_REG p1.From.Reg = arm64.REGTMP @@ -844,16 +857,13 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p1.To.Reg = r0 p1.RegTo2 = out - // AND Rarg1, Rout - p2 := s.Prog(arm64.AAND) - p2.From.Type = obj.TYPE_REG - p2.From.Reg = r1 - p2.To.Type = obj.TYPE_REG - p2.To.Reg = out - case ssa.OpARM64LoweredAtomicOr8Variant, - ssa.OpARM64LoweredAtomicOr32Variant: - atomic_or := arm64.ALDORALW + ssa.OpARM64LoweredAtomicOr32Variant, + ssa.OpARM64LoweredAtomicOr64Variant: + atomic_or := arm64.ALDORALD + if v.Op == ssa.OpARM64LoweredAtomicOr32Variant { + atomic_or = arm64.ALDORALW + } if v.Op == ssa.OpARM64LoweredAtomicOr8Variant { atomic_or = arm64.ALDORALB } @@ -861,7 +871,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { r1 := v.Args[1].Reg() out := v.Reg0() - // LDORALW Rarg1, (Rarg0), Rout + // LDORAL[BDW] Rarg1, (Rarg0), Rout p := s.Prog(atomic_or) p.From.Type = obj.TYPE_REG p.From.Reg = r1 @@ -869,13 +879,6 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p.To.Reg = r0 p.RegTo2 = out - // ORR Rarg1, Rout - p2 := s.Prog(arm64.AORR) - p2.From.Type = obj.TYPE_REG - p2.From.Reg = r1 - p2.To.Type = obj.TYPE_REG - p2.To.Reg = out - case ssa.OpARM64MOVBreg, ssa.OpARM64MOVBUreg, ssa.OpARM64MOVHreg, diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64.rules b/src/cmd/compile/internal/ssa/_gen/ARM64.rules index 18a6586fb0..1b588edb04 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules @@ -579,11 +579,11 @@ (AtomicExchange(32|64)Variant ...) => (LoweredAtomicExchange(32|64)Variant ...) (AtomicCompareAndSwap(32|64)Variant ...) => (LoweredAtomicCas(32|64)Variant ...) -// Currently the updated value is not used, but we need a register to temporarily hold it. -(AtomicAnd(8|32) ptr val mem) => (Select1 (LoweredAtomicAnd(8|32) ptr val mem)) -(AtomicOr(8|32) ptr val mem) => (Select1 (LoweredAtomicOr(8|32) ptr val mem)) -(AtomicAnd(8|32)Variant ptr val mem) => (Select1 (LoweredAtomicAnd(8|32)Variant ptr val mem)) -(AtomicOr(8|32)Variant ptr val mem) => (Select1 (LoweredAtomicOr(8|32)Variant ptr val mem)) +// Return old contents. +(AtomicAnd(64|32|8) ...) => (LoweredAtomicAnd(64|32|8) ...) +(AtomicOr(64|32|8) ...) => (LoweredAtomicOr(64|32|8) ...) +(AtomicAnd(64|32|8)Variant ...) => (LoweredAtomicAnd(64|32|8)Variant ...) +(AtomicOr(64|32|8)Variant ...) => (LoweredAtomicOr(64|32|8)Variant ...) // Write barrier. (WB ...) => (LoweredWB ...) diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go index 5a98aa0c54..fa18b674cc 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go @@ -707,29 +707,31 @@ func init() { {name: "LoweredAtomicCas32Variant", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // atomic and/or. - // *arg0 &= (|=) arg1. arg2=mem. returns . auxint must be zero. + // *arg0 &= (|=) arg1. arg2=mem. returns . auxint must be zero. // LDAXR (Rarg0), Rout - // AND/OR Rarg1, Rout - // STLXR Rout, (Rarg0), Rtmp + // AND/OR Rarg1, Rout, tempReg + // STLXR tempReg, (Rarg0), Rtmp // CBNZ Rtmp, -3(PC) - {name: "LoweredAtomicAnd8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "AND", typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, - {name: "LoweredAtomicAnd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "AND", typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, - {name: "LoweredAtomicOr8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, - {name: "LoweredAtomicOr32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + {name: "LoweredAtomicAnd8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "AND", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true, needIntTemp: true}, + {name: "LoweredAtomicOr8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true, needIntTemp: true}, + {name: "LoweredAtomicAnd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "AND", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true, needIntTemp: true}, + {name: "LoweredAtomicOr64", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true, needIntTemp: true}, + {name: "LoweredAtomicAnd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "AND", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true, needIntTemp: true}, + {name: "LoweredAtomicOr32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true, needIntTemp: true}, // atomic and/or variant. - // *arg0 &= (|=) arg1. arg2=mem. returns . auxint must be zero. + // *arg0 &= (|=) arg1. arg2=mem. returns . auxint must be zero. // AND: // MNV Rarg1, Rtemp // LDANDALB Rtemp, (Rarg0), Rout - // AND Rarg1, Rout // OR: // LDORALB Rarg1, (Rarg0), Rout - // ORR Rarg1, Rout - {name: "LoweredAtomicAnd8Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, - {name: "LoweredAtomicAnd32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, - {name: "LoweredAtomicOr8Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true}, - {name: "LoweredAtomicOr32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true}, + {name: "LoweredAtomicAnd8Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + {name: "LoweredAtomicOr8Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true}, + {name: "LoweredAtomicAnd64Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + {name: "LoweredAtomicOr64Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true}, + {name: "LoweredAtomicAnd32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + {name: "LoweredAtomicOr32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true}, // LoweredWB invokes runtime.gcWriteBarrier. arg0=mem, auxint=# of buffer entries needed // It saves all GP registers if necessary, diff --git a/src/cmd/compile/internal/ssa/_gen/genericOps.go b/src/cmd/compile/internal/ssa/_gen/genericOps.go index 637e7b617c..6805408b46 100644 --- a/src/cmd/compile/internal/ssa/_gen/genericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/genericOps.go @@ -609,25 +609,31 @@ var genericOps = []opData{ {name: "AtomicCompareAndSwap32", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Returns true if store happens and new memory. {name: "AtomicCompareAndSwap64", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Returns true if store happens and new memory. {name: "AtomicCompareAndSwapRel32", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Lock release, reports whether store happens and new memory. - {name: "AtomicAnd8", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory. - {name: "AtomicAnd32", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory. - {name: "AtomicOr8", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory. - {name: "AtomicOr32", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory. + {name: "AtomicAnd8", argLength: 3, typ: "(Uint8, Mem)", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns old contents of *arg0 and new memory. + {name: "AtomicOr8", argLength: 3, typ: "(Uint8, Mem)", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns old contents of *arg0 and new memory. + {name: "AtomicAnd64", argLength: 3, typ: "(Uint64, Mem)", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns old contents of *arg0 and new memory. + {name: "AtomicAnd32", argLength: 3, typ: "(Uint32, Mem)", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns old contents of *arg0 and new memory. + {name: "AtomicOr64", argLength: 3, typ: "(Uint64, Mem)", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns old contents of *arg0 and new memory. + {name: "AtomicOr32", argLength: 3, typ: "(Uint32, Mem)", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns old contents of *arg0 and new memory. // Atomic operation variants // These variants have the same semantics as above atomic operations. // But they are used for generating more efficient code on certain modern machines, with run-time CPU feature detection. - // Currently, they are used on ARM64 only. + // On ARM64, these are used when the LSE hardware feature is avaliable (either known at compile time or detected at runtime). If LSE is not avaliable, + // then the basic atomic oprations are used instead. + // These are not currently used on any other platform. {name: "AtomicAdd32Variant", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Do *arg0 += arg1. arg2=memory. Returns sum and new memory. {name: "AtomicAdd64Variant", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true}, // Do *arg0 += arg1. arg2=memory. Returns sum and new memory. {name: "AtomicExchange32Variant", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns old contents of *arg0 and new memory. {name: "AtomicExchange64Variant", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns old contents of *arg0 and new memory. {name: "AtomicCompareAndSwap32Variant", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Returns true if store happens and new memory. {name: "AtomicCompareAndSwap64Variant", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Returns true if store happens and new memory. - {name: "AtomicAnd8Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory. - {name: "AtomicAnd32Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory. - {name: "AtomicOr8Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory. - {name: "AtomicOr32Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory. + {name: "AtomicAnd8Variant", argLength: 3, typ: "(Uint8, Mem)", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns old contents of *arg0 and new memory. + {name: "AtomicOr8Variant", argLength: 3, typ: "(Uint8, Mem)", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns old contents of *arg0 and new memory. + {name: "AtomicAnd64Variant", argLength: 3, typ: "(Uint64, Mem)", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns old contents of *arg0 and new memory. + {name: "AtomicOr64Variant", argLength: 3, typ: "(Uint64, Mem)", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns old contents of *arg0 and new memory. + {name: "AtomicAnd32Variant", argLength: 3, typ: "(Uint32, Mem)", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns old contents of *arg0 and new memory. + {name: "AtomicOr32Variant", argLength: 3, typ: "(Uint32, Mem)", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns old contents of *arg0 and new memory. // Publication barrier {name: "PubBarrier", argLength: 1, hasSideEffects: true}, // Do data barrier. arg0=memory. diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index b07d515bd1..847d62c0a5 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1717,12 +1717,16 @@ const ( OpARM64LoweredAtomicCas64Variant OpARM64LoweredAtomicCas32Variant OpARM64LoweredAtomicAnd8 - OpARM64LoweredAtomicAnd32 OpARM64LoweredAtomicOr8 + OpARM64LoweredAtomicAnd64 + OpARM64LoweredAtomicOr64 + OpARM64LoweredAtomicAnd32 OpARM64LoweredAtomicOr32 OpARM64LoweredAtomicAnd8Variant - OpARM64LoweredAtomicAnd32Variant OpARM64LoweredAtomicOr8Variant + OpARM64LoweredAtomicAnd64Variant + OpARM64LoweredAtomicOr64Variant + OpARM64LoweredAtomicAnd32Variant OpARM64LoweredAtomicOr32Variant OpARM64LoweredWB OpARM64LoweredPanicBoundsA @@ -3226,8 +3230,10 @@ const ( OpAtomicCompareAndSwap64 OpAtomicCompareAndSwapRel32 OpAtomicAnd8 - OpAtomicAnd32 OpAtomicOr8 + OpAtomicAnd64 + OpAtomicAnd32 + OpAtomicOr64 OpAtomicOr32 OpAtomicAdd32Variant OpAtomicAdd64Variant @@ -3236,8 +3242,10 @@ const ( OpAtomicCompareAndSwap32Variant OpAtomicCompareAndSwap64Variant OpAtomicAnd8Variant - OpAtomicAnd32Variant OpAtomicOr8Variant + OpAtomicAnd64Variant + OpAtomicOr64Variant + OpAtomicAnd32Variant OpAtomicOr32Variant OpPubBarrier OpClobber @@ -23000,6 +23008,7 @@ var opcodeTable = [...]opInfo{ name: "LoweredAtomicAnd8", argLen: 3, resultNotInArgs: true, + needIntTemp: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true, @@ -23015,9 +23024,29 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "LoweredAtomicAnd32", + name: "LoweredAtomicOr8", + argLen: 3, + resultNotInArgs: true, + needIntTemp: true, + faultOnNilArg0: true, + hasSideEffects: true, + unsafePoint: true, + asm: arm64.AORR, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, + { + name: "LoweredAtomicAnd64", argLen: 3, resultNotInArgs: true, + needIntTemp: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true, @@ -23033,9 +23062,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "LoweredAtomicOr8", + name: "LoweredAtomicOr64", argLen: 3, resultNotInArgs: true, + needIntTemp: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true, @@ -23050,10 +23080,30 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "LoweredAtomicAnd32", + argLen: 3, + resultNotInArgs: true, + needIntTemp: true, + faultOnNilArg0: true, + hasSideEffects: true, + unsafePoint: true, + asm: arm64.AAND, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, { name: "LoweredAtomicOr32", argLen: 3, resultNotInArgs: true, + needIntTemp: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true, @@ -23086,7 +23136,23 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "LoweredAtomicAnd32Variant", + name: "LoweredAtomicOr8Variant", + argLen: 3, + resultNotInArgs: true, + faultOnNilArg0: true, + hasSideEffects: true, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, + { + name: "LoweredAtomicAnd64Variant", argLen: 3, resultNotInArgs: true, faultOnNilArg0: true, @@ -23103,7 +23169,7 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "LoweredAtomicOr8Variant", + name: "LoweredAtomicOr64Variant", argLen: 3, resultNotInArgs: true, faultOnNilArg0: true, @@ -23118,6 +23184,23 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "LoweredAtomicAnd32Variant", + argLen: 3, + resultNotInArgs: true, + faultOnNilArg0: true, + hasSideEffects: true, + unsafePoint: true, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, { name: "LoweredAtomicOr32Variant", argLen: 3, @@ -40649,6 +40732,18 @@ var opcodeTable = [...]opInfo{ hasSideEffects: true, generic: true, }, + { + name: "AtomicOr8", + argLen: 3, + hasSideEffects: true, + generic: true, + }, + { + name: "AtomicAnd64", + argLen: 3, + hasSideEffects: true, + generic: true, + }, { name: "AtomicAnd32", argLen: 3, @@ -40656,7 +40751,7 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "AtomicOr8", + name: "AtomicOr64", argLen: 3, hasSideEffects: true, generic: true, @@ -40710,13 +40805,25 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "AtomicAnd32Variant", + name: "AtomicOr8Variant", argLen: 3, hasSideEffects: true, generic: true, }, { - name: "AtomicOr8Variant", + name: "AtomicAnd64Variant", + argLen: 3, + hasSideEffects: true, + generic: true, + }, + { + name: "AtomicOr64Variant", + argLen: 3, + hasSideEffects: true, + generic: true, + }, + { + name: "AtomicAnd32Variant", argLen: 3, hasSideEffects: true, generic: true, diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index 8f60f023b1..a548f6bd97 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -470,13 +470,23 @@ func rewriteValueARM64(v *Value) bool { v.Op = OpARM64LoweredAtomicAdd64Variant return true case OpAtomicAnd32: - return rewriteValueARM64_OpAtomicAnd32(v) + v.Op = OpARM64LoweredAtomicAnd32 + return true case OpAtomicAnd32Variant: - return rewriteValueARM64_OpAtomicAnd32Variant(v) + v.Op = OpARM64LoweredAtomicAnd32Variant + return true + case OpAtomicAnd64: + v.Op = OpARM64LoweredAtomicAnd64 + return true + case OpAtomicAnd64Variant: + v.Op = OpARM64LoweredAtomicAnd64Variant + return true case OpAtomicAnd8: - return rewriteValueARM64_OpAtomicAnd8(v) + v.Op = OpARM64LoweredAtomicAnd8 + return true case OpAtomicAnd8Variant: - return rewriteValueARM64_OpAtomicAnd8Variant(v) + v.Op = OpARM64LoweredAtomicAnd8Variant + return true case OpAtomicCompareAndSwap32: v.Op = OpARM64LoweredAtomicCas32 return true @@ -514,13 +524,23 @@ func rewriteValueARM64(v *Value) bool { v.Op = OpARM64LDAR return true case OpAtomicOr32: - return rewriteValueARM64_OpAtomicOr32(v) + v.Op = OpARM64LoweredAtomicOr32 + return true case OpAtomicOr32Variant: - return rewriteValueARM64_OpAtomicOr32Variant(v) + v.Op = OpARM64LoweredAtomicOr32Variant + return true + case OpAtomicOr64: + v.Op = OpARM64LoweredAtomicOr64 + return true + case OpAtomicOr64Variant: + v.Op = OpARM64LoweredAtomicOr64Variant + return true case OpAtomicOr8: - return rewriteValueARM64_OpAtomicOr8(v) + v.Op = OpARM64LoweredAtomicOr8 + return true case OpAtomicOr8Variant: - return rewriteValueARM64_OpAtomicOr8Variant(v) + v.Op = OpARM64LoweredAtomicOr8Variant + return true case OpAtomicStore32: v.Op = OpARM64STLRW return true @@ -17783,158 +17803,6 @@ func rewriteValueARM64_OpAddr(v *Value) bool { return true } } -func rewriteValueARM64_OpAtomicAnd32(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (AtomicAnd32 ptr val mem) - // result: (Select1 (LoweredAtomicAnd32 ptr val mem)) - for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicAnd32, types.NewTuple(typ.UInt32, types.TypeMem)) - v0.AddArg3(ptr, val, mem) - v.AddArg(v0) - return true - } -} -func rewriteValueARM64_OpAtomicAnd32Variant(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (AtomicAnd32Variant ptr val mem) - // result: (Select1 (LoweredAtomicAnd32Variant ptr val mem)) - for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicAnd32Variant, types.NewTuple(typ.UInt32, types.TypeMem)) - v0.AddArg3(ptr, val, mem) - v.AddArg(v0) - return true - } -} -func rewriteValueARM64_OpAtomicAnd8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (AtomicAnd8 ptr val mem) - // result: (Select1 (LoweredAtomicAnd8 ptr val mem)) - for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicAnd8, types.NewTuple(typ.UInt8, types.TypeMem)) - v0.AddArg3(ptr, val, mem) - v.AddArg(v0) - return true - } -} -func rewriteValueARM64_OpAtomicAnd8Variant(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (AtomicAnd8Variant ptr val mem) - // result: (Select1 (LoweredAtomicAnd8Variant ptr val mem)) - for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicAnd8Variant, types.NewTuple(typ.UInt8, types.TypeMem)) - v0.AddArg3(ptr, val, mem) - v.AddArg(v0) - return true - } -} -func rewriteValueARM64_OpAtomicOr32(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (AtomicOr32 ptr val mem) - // result: (Select1 (LoweredAtomicOr32 ptr val mem)) - for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicOr32, types.NewTuple(typ.UInt32, types.TypeMem)) - v0.AddArg3(ptr, val, mem) - v.AddArg(v0) - return true - } -} -func rewriteValueARM64_OpAtomicOr32Variant(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (AtomicOr32Variant ptr val mem) - // result: (Select1 (LoweredAtomicOr32Variant ptr val mem)) - for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicOr32Variant, types.NewTuple(typ.UInt32, types.TypeMem)) - v0.AddArg3(ptr, val, mem) - v.AddArg(v0) - return true - } -} -func rewriteValueARM64_OpAtomicOr8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (AtomicOr8 ptr val mem) - // result: (Select1 (LoweredAtomicOr8 ptr val mem)) - for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicOr8, types.NewTuple(typ.UInt8, types.TypeMem)) - v0.AddArg3(ptr, val, mem) - v.AddArg(v0) - return true - } -} -func rewriteValueARM64_OpAtomicOr8Variant(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (AtomicOr8Variant ptr val mem) - // result: (Select1 (LoweredAtomicOr8Variant ptr val mem)) - for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicOr8Variant, types.NewTuple(typ.UInt8, types.TypeMem)) - v0.AddArg3(ptr, val, mem) - v.AddArg(v0) - return true - } -} func rewriteValueARM64_OpAvg64u(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go index 596d2e75dd..0d2693ea33 100644 --- a/src/cmd/compile/internal/ssagen/ssa.go +++ b/src/cmd/compile/internal/ssagen/ssa.go @@ -4451,16 +4451,16 @@ func InitTables() { } } - atomicXchgXaddEmitterARM64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind) { + atomicEmitterARM64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind) { v := s.newValue3(op, types.NewTuple(types.Types[typ], types.TypeMem), args[0], args[1], s.mem()) s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v) } addF("internal/runtime/atomic", "Xchg", - makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange32, ssa.OpAtomicExchange32Variant, types.TUINT32, atomicXchgXaddEmitterARM64), + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange32, ssa.OpAtomicExchange32Variant, types.TUINT32, atomicEmitterARM64), sys.ARM64) addF("internal/runtime/atomic", "Xchg64", - makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange64, ssa.OpAtomicExchange64Variant, types.TUINT64, atomicXchgXaddEmitterARM64), + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange64, ssa.OpAtomicExchange64Variant, types.TUINT64, atomicEmitterARM64), sys.ARM64) addF("internal/runtime/atomic", "Xadd", @@ -4479,10 +4479,10 @@ func InitTables() { sys.AMD64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) addF("internal/runtime/atomic", "Xadd", - makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd32, ssa.OpAtomicAdd32Variant, types.TUINT32, atomicXchgXaddEmitterARM64), + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd32, ssa.OpAtomicAdd32Variant, types.TUINT32, atomicEmitterARM64), sys.ARM64) addF("internal/runtime/atomic", "Xadd64", - makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd64, ssa.OpAtomicAdd64Variant, types.TUINT64, atomicXchgXaddEmitterARM64), + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd64, ssa.OpAtomicAdd64Variant, types.TUINT64, atomicEmitterARM64), sys.ARM64) addF("internal/runtime/atomic", "Cas", @@ -4545,21 +4545,29 @@ func InitTables() { }, sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - atomicAndOrEmitterARM64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind) { - s.vars[memVar] = s.newValue3(op, types.TypeMem, args[0], args[1], s.mem()) - } - addF("internal/runtime/atomic", "And8", - makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd8, ssa.OpAtomicAnd8Variant, types.TNIL, atomicAndOrEmitterARM64), + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd8, ssa.OpAtomicAnd8Variant, types.TUINT8, atomicEmitterARM64), + sys.ARM64) + addF("internal/runtime/atomic", "Or8", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr8, ssa.OpAtomicOr8Variant, types.TUINT8, atomicEmitterARM64), + sys.ARM64) + addF("internal/runtime/atomic", "And64", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd64, ssa.OpAtomicAnd64Variant, types.TUINT64, atomicEmitterARM64), + sys.ARM64) + addF("internal/runtime/atomic", "And32", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd32, ssa.OpAtomicAnd32Variant, types.TUINT32, atomicEmitterARM64), sys.ARM64) addF("internal/runtime/atomic", "And", - makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd32, ssa.OpAtomicAnd32Variant, types.TNIL, atomicAndOrEmitterARM64), + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd32, ssa.OpAtomicAnd32Variant, types.TUINT32, atomicEmitterARM64), sys.ARM64) - addF("internal/runtime/atomic", "Or8", - makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr8, ssa.OpAtomicOr8Variant, types.TNIL, atomicAndOrEmitterARM64), + addF("internal/runtime/atomic", "Or64", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr64, ssa.OpAtomicOr64Variant, types.TUINT64, atomicEmitterARM64), + sys.ARM64) + addF("internal/runtime/atomic", "Or32", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr32, ssa.OpAtomicOr32Variant, types.TUINT32, atomicEmitterARM64), sys.ARM64) addF("internal/runtime/atomic", "Or", - makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr32, ssa.OpAtomicOr32Variant, types.TNIL, atomicAndOrEmitterARM64), + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr32, ssa.OpAtomicOr32Variant, types.TUINT32, atomicEmitterARM64), sys.ARM64) // Aliases for atomic load operations @@ -4609,6 +4617,10 @@ func InitTables() { alias("internal/runtime/atomic", "Casp1", "internal/runtime/atomic", "Cas64", p8...) alias("internal/runtime/atomic", "CasRel", "internal/runtime/atomic", "Cas", lwatomics...) + // Aliases for atomic And/Or operations + alias("internal/runtime/atomic", "Anduintptr", "internal/runtime/atomic", "And64", sys.ArchARM64) + alias("internal/runtime/atomic", "Oruintptr", "internal/runtime/atomic", "Or64", sys.ArchARM64) + /******** math ********/ addF("math", "sqrt", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { @@ -5085,6 +5097,17 @@ func InitTables() { alias("sync/atomic", "AddUintptr", "internal/runtime/atomic", "Xadd", p4...) alias("sync/atomic", "AddUintptr", "internal/runtime/atomic", "Xadd64", p8...) + alias("sync/atomic", "AndInt32", "internal/runtime/atomic", "And32", sys.ArchARM64) + alias("sync/atomic", "AndUint32", "internal/runtime/atomic", "And32", sys.ArchARM64) + alias("sync/atomic", "AndInt64", "internal/runtime/atomic", "And64", sys.ArchARM64) + alias("sync/atomic", "AndUint64", "internal/runtime/atomic", "And64", sys.ArchARM64) + alias("sync/atomic", "AndUintptr", "internal/runtime/atomic", "And64", sys.ArchARM64) + alias("sync/atomic", "OrInt32", "internal/runtime/atomic", "Or32", sys.ArchARM64) + alias("sync/atomic", "OrUint32", "internal/runtime/atomic", "Or32", sys.ArchARM64) + alias("sync/atomic", "OrInt64", "internal/runtime/atomic", "Or64", sys.ArchARM64) + alias("sync/atomic", "OrUint64", "internal/runtime/atomic", "Or64", sys.ArchARM64) + alias("sync/atomic", "OrUintptr", "internal/runtime/atomic", "Or64", sys.ArchARM64) + /******** math/big ********/ alias("math/big", "mulWW", "math/bits", "Mul64", p8...) }