From: Michael Pratt Date: Fri, 16 Oct 2020 20:34:52 +0000 (-0400) Subject: cmd/compile: intrinsify runtime/internal/atomic.{And,Or} on ARM64 X-Git-Tag: go1.16beta1~581 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=cdb19b4dba58c0e3cabde8b728156dfe273707b3;p=gostls13.git cmd/compile: intrinsify runtime/internal/atomic.{And,Or} on ARM64 These are identical to And8 and Or8, just using LDAXRW/STLXRW instead of LDAXRB/STLXRB. Change-Id: I5308832ae165064550bee4bb245809ab952f4cc8 Reviewed-on: https://go-review.googlesource.com/c/go/+/263148 Run-TryBot: Michael Pratt TryBot-Result: Go Bot Trust: Michael Pratt Reviewed-by: Cherry Zhang --- diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go index 1d6ea6b9d8..5c695ef84c 100644 --- a/src/cmd/compile/internal/arm64/ssa.go +++ b/src/cmd/compile/internal/arm64/ssa.go @@ -688,15 +688,23 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p5.To.Reg = out gc.Patch(p2, p5) case ssa.OpARM64LoweredAtomicAnd8, - ssa.OpARM64LoweredAtomicOr8: - // LDAXRB (Rarg0), Rout + ssa.OpARM64LoweredAtomicAnd32, + ssa.OpARM64LoweredAtomicOr8, + ssa.OpARM64LoweredAtomicOr32: + // LDAXRB/LDAXRW (Rarg0), Rout // AND/OR Rarg1, Rout - // STLXRB Rout, (Rarg0), Rtmp + // STLXRB/STLXRB Rout, (Rarg0), Rtmp // CBNZ Rtmp, -3(PC) + ld := arm64.ALDAXRB + st := arm64.ASTLXRB + if v.Op == ssa.OpARM64LoweredAtomicAnd32 || v.Op == ssa.OpARM64LoweredAtomicOr32 { + ld = arm64.ALDAXRW + st = arm64.ASTLXRW + } r0 := v.Args[0].Reg() r1 := v.Args[1].Reg() out := v.Reg0() - p := s.Prog(arm64.ALDAXRB) + p := s.Prog(ld) p.From.Type = obj.TYPE_MEM p.From.Reg = r0 p.To.Type = obj.TYPE_REG @@ -706,7 +714,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p1.From.Reg = r1 p1.To.Type = obj.TYPE_REG p1.To.Reg = out - p2 := s.Prog(arm64.ASTLXRB) + p2 := s.Prog(st) p2.From.Type = obj.TYPE_REG p2.From.Reg = out p2.To.Type = obj.TYPE_MEM diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 10d0d5fb56..1561fe2106 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -3546,7 +3546,7 @@ func init() { s.vars[&memVar] = s.newValue3(ssa.OpAtomicAnd32, types.TypeMem, args[0], args[1], s.mem()) return nil }, - sys.AMD64) // TODO: same arches as And8. + sys.AMD64, sys.ARM64) // TODO: same arches as And8. addF("runtime/internal/atomic", "Or8", func(s *state, n *Node, args []*ssa.Value) *ssa.Value { s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr8, types.TypeMem, args[0], args[1], s.mem()) @@ -3558,7 +3558,7 @@ func init() { s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr32, types.TypeMem, args[0], args[1], s.mem()) return nil }, - sys.AMD64) // TODO: same arches as Or8. + sys.AMD64, sys.ARM64) // TODO: same arches as Or8. alias("runtime/internal/atomic", "Loadint64", "runtime/internal/atomic", "Load64", all...) alias("runtime/internal/atomic", "Xaddint64", "runtime/internal/atomic", "Xadd64", all...) diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index c4a3532632..f2d2fb6cf6 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -548,8 +548,10 @@ (AtomicCompareAndSwap(32|64) ...) => (LoweredAtomicCas(32|64) ...) // Currently the updated value is not used, but we need a register to temporarily hold it. -(AtomicAnd8 ptr val mem) => (Select1 (LoweredAtomicAnd8 ptr val mem)) -(AtomicOr8 ptr val mem) => (Select1 (LoweredAtomicOr8 ptr val mem)) +(AtomicAnd8 ptr val mem) => (Select1 (LoweredAtomicAnd8 ptr val mem)) +(AtomicAnd32 ptr val mem) => (Select1 (LoweredAtomicAnd32 ptr val mem)) +(AtomicOr8 ptr val mem) => (Select1 (LoweredAtomicOr8 ptr val mem)) +(AtomicOr32 ptr val mem) => (Select1 (LoweredAtomicOr32 ptr val mem)) (AtomicAdd(32|64)Variant ...) => (LoweredAtomicAdd(32|64)Variant ...) diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go index 9ff53f7e4e..fe9edbf933 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go @@ -656,12 +656,14 @@ func init() { // atomic and/or. // *arg0 &= (|=) arg1. arg2=mem. returns . auxint must be zero. - // LDAXRB (Rarg0), Rout + // LDAXR (Rarg0), Rout // AND/OR Rarg1, Rout - // STLXRB Rout, (Rarg0), Rtmp + // STLXR Rout, (Rarg0), Rtmp // CBNZ Rtmp, -3(PC) {name: "LoweredAtomicAnd8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "AND", typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + {name: "LoweredAtomicAnd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "AND", typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicOr8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + {name: "LoweredAtomicOr32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier // It saves all GP registers if necessary, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 00efc8f38d..f86210e631 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1588,7 +1588,9 @@ const ( OpARM64LoweredAtomicCas64 OpARM64LoweredAtomicCas32 OpARM64LoweredAtomicAnd8 + OpARM64LoweredAtomicAnd32 OpARM64LoweredAtomicOr8 + OpARM64LoweredAtomicOr32 OpARM64LoweredWB OpARM64LoweredPanicBoundsA OpARM64LoweredPanicBoundsB @@ -21096,6 +21098,24 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "LoweredAtomicAnd32", + argLen: 3, + resultNotInArgs: true, + faultOnNilArg0: true, + hasSideEffects: true, + unsafePoint: true, + asm: arm64.AAND, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, { name: "LoweredAtomicOr8", argLen: 3, @@ -21114,6 +21134,24 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "LoweredAtomicOr32", + argLen: 3, + resultNotInArgs: true, + faultOnNilArg0: true, + hasSideEffects: true, + unsafePoint: true, + asm: arm64.AORR, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, { name: "LoweredWB", auxType: auxSym, diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index 6c48812121..327f1674b5 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -424,6 +424,8 @@ func rewriteValueARM64(v *Value) bool { case OpAtomicAdd64Variant: v.Op = OpARM64LoweredAtomicAdd64Variant return true + case OpAtomicAnd32: + return rewriteValueARM64_OpAtomicAnd32(v) case OpAtomicAnd8: return rewriteValueARM64_OpAtomicAnd8(v) case OpAtomicCompareAndSwap32: @@ -450,6 +452,8 @@ func rewriteValueARM64(v *Value) bool { case OpAtomicLoadPtr: v.Op = OpARM64LDAR return true + case OpAtomicOr32: + return rewriteValueARM64_OpAtomicOr32(v) case OpAtomicOr8: return rewriteValueARM64_OpAtomicOr8(v) case OpAtomicStore32: @@ -21340,6 +21344,25 @@ func rewriteValueARM64_OpAddr(v *Value) bool { return true } } +func rewriteValueARM64_OpAtomicAnd32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (AtomicAnd32 ptr val mem) + // result: (Select1 (LoweredAtomicAnd32 ptr val mem)) + for { + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicAnd32, types.NewTuple(typ.UInt32, types.TypeMem)) + v0.AddArg3(ptr, val, mem) + v.AddArg(v0) + return true + } +} func rewriteValueARM64_OpAtomicAnd8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -21359,6 +21382,25 @@ func rewriteValueARM64_OpAtomicAnd8(v *Value) bool { return true } } +func rewriteValueARM64_OpAtomicOr32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (AtomicOr32 ptr val mem) + // result: (Select1 (LoweredAtomicOr32 ptr val mem)) + for { + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicOr32, types.NewTuple(typ.UInt32, types.TypeMem)) + v0.AddArg3(ptr, val, mem) + v.AddArg(v0) + return true + } +} func rewriteValueARM64_OpAtomicOr8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1]