From 44dbeaf35600ae70f3e6296914ea31147d5f010c Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Fri, 9 Oct 2020 12:41:50 -0400 Subject: [PATCH] cmd/compile: intrinsify runtime/internal/atomic.{And,Or} on AMD64 These are identical to And8 and Or8, just using ANDL/ORL instead of ANDB/ORB. Change-Id: I99cf90a8b0b5f211fb23325dddd55821875f0c8f Reviewed-on: https://go-review.googlesource.com/c/go/+/263140 Run-TryBot: Michael Pratt TryBot-Result: Go Bot Trust: Michael Pratt Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/amd64/ssa.go | 2 +- src/cmd/compile/internal/gc/ssa.go | 12 +++++ src/cmd/compile/internal/ssa/gen/AMD64.rules | 6 ++- src/cmd/compile/internal/ssa/gen/AMD64Ops.go | 2 + .../compile/internal/ssa/gen/genericOps.go | 2 + src/cmd/compile/internal/ssa/opGen.go | 48 +++++++++++++++++++ src/cmd/compile/internal/ssa/rewriteAMD64.go | 34 +++++++++++++ 7 files changed, 103 insertions(+), 3 deletions(-) diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index 4ac877986c..f30a47b903 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -1210,7 +1210,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p = s.Prog(x86.ASETEQ) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg0() - case ssa.OpAMD64ANDBlock, ssa.OpAMD64ORBlock: + case ssa.OpAMD64ANDBlock, ssa.OpAMD64ANDLlock, ssa.OpAMD64ORBlock, ssa.OpAMD64ORLlock: s.Prog(x86.ALOCK) p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 65beb84911..10d0d5fb56 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -3541,12 +3541,24 @@ func init() { return nil }, sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X) + addF("runtime/internal/atomic", "And", + func(s *state, n *Node, args []*ssa.Value) *ssa.Value { + s.vars[&memVar] = s.newValue3(ssa.OpAtomicAnd32, types.TypeMem, args[0], args[1], s.mem()) + return nil + }, + sys.AMD64) // TODO: same arches as And8. addF("runtime/internal/atomic", "Or8", func(s *state, n *Node, args []*ssa.Value) *ssa.Value { s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr8, types.TypeMem, args[0], args[1], s.mem()) return nil }, sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X) + addF("runtime/internal/atomic", "Or", + func(s *state, n *Node, args []*ssa.Value) *ssa.Value { + s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr32, types.TypeMem, args[0], args[1], s.mem()) + return nil + }, + sys.AMD64) // TODO: same arches as Or8. alias("runtime/internal/atomic", "Loadint64", "runtime/internal/atomic", "Load64", all...) alias("runtime/internal/atomic", "Xaddint64", "runtime/internal/atomic", "Xadd64", all...) diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index 8a253035e0..b9b29a489d 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -530,8 +530,10 @@ (AtomicCompareAndSwap64 ptr old new_ mem) => (CMPXCHGQlock ptr old new_ mem) // Atomic memory updates. -(AtomicAnd8 ptr val mem) => (ANDBlock ptr val mem) -(AtomicOr8 ptr val mem) => (ORBlock ptr val mem) +(AtomicAnd8 ptr val mem) => (ANDBlock ptr val mem) +(AtomicAnd32 ptr val mem) => (ANDLlock ptr val mem) +(AtomicOr8 ptr val mem) => (ORBlock ptr val mem) +(AtomicOr32 ptr val mem) => (ORLlock ptr val mem) // Write barrier. (WB ...) => (LoweredWB ...) diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index 2df5016d59..de5372670b 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -902,7 +902,9 @@ func init() { // Atomic memory updates. {name: "ANDBlock", argLength: 3, reg: gpstore, asm: "ANDB", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) &= arg1 + {name: "ANDLlock", argLength: 3, reg: gpstore, asm: "ANDL", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) &= arg1 {name: "ORBlock", argLength: 3, reg: gpstore, asm: "ORB", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) |= arg1 + {name: "ORLlock", argLength: 3, reg: gpstore, asm: "ORL", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) |= arg1 } var AMD64blocks = []blockData{ diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go index 12ba9f1fc9..23bd4af2cd 100644 --- a/src/cmd/compile/internal/ssa/gen/genericOps.go +++ b/src/cmd/compile/internal/ssa/gen/genericOps.go @@ -565,7 +565,9 @@ var genericOps = []opData{ {name: "AtomicCompareAndSwap64", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Returns true if store happens and new memory. {name: "AtomicCompareAndSwapRel32", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Lock release, reports whether store happens and new memory. {name: "AtomicAnd8", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory. + {name: "AtomicAnd32", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory. {name: "AtomicOr8", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory. + {name: "AtomicOr32", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory. // Atomic operation variants // These variants have the same semantics as above atomic operations. diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index e9c63cdddf..00efc8f38d 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1034,7 +1034,9 @@ const ( OpAMD64CMPXCHGLlock OpAMD64CMPXCHGQlock OpAMD64ANDBlock + OpAMD64ANDLlock OpAMD64ORBlock + OpAMD64ORLlock OpARMADD OpARMADDconst @@ -2854,7 +2856,9 @@ const ( OpAtomicCompareAndSwap64 OpAtomicCompareAndSwapRel32 OpAtomicAnd8 + OpAtomicAnd32 OpAtomicOr8 + OpAtomicOr32 OpAtomicAdd32Variant OpAtomicAdd64Variant OpClobber @@ -13575,6 +13579,22 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "ANDLlock", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + hasSideEffects: true, + symEffect: SymRdWr, + asm: x86.AANDL, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, { name: "ORBlock", auxType: auxSymOff, @@ -13591,6 +13611,22 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "ORLlock", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + hasSideEffects: true, + symEffect: SymRdWr, + asm: x86.AORL, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, { name: "ADD", @@ -35520,12 +35556,24 @@ var opcodeTable = [...]opInfo{ hasSideEffects: true, generic: true, }, + { + name: "AtomicAnd32", + argLen: 3, + hasSideEffects: true, + generic: true, + }, { name: "AtomicOr8", argLen: 3, hasSideEffects: true, generic: true, }, + { + name: "AtomicOr32", + argLen: 3, + hasSideEffects: true, + generic: true, + }, { name: "AtomicAdd32Variant", argLen: 3, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 32ef26f98d..15bb627450 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -572,6 +572,8 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAtomicAdd32(v) case OpAtomicAdd64: return rewriteValueAMD64_OpAtomicAdd64(v) + case OpAtomicAnd32: + return rewriteValueAMD64_OpAtomicAnd32(v) case OpAtomicAnd8: return rewriteValueAMD64_OpAtomicAnd8(v) case OpAtomicCompareAndSwap32: @@ -590,6 +592,8 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAtomicLoad8(v) case OpAtomicLoadPtr: return rewriteValueAMD64_OpAtomicLoadPtr(v) + case OpAtomicOr32: + return rewriteValueAMD64_OpAtomicOr32(v) case OpAtomicOr8: return rewriteValueAMD64_OpAtomicOr8(v) case OpAtomicStore32: @@ -28476,6 +28480,21 @@ func rewriteValueAMD64_OpAtomicAdd64(v *Value) bool { return true } } +func rewriteValueAMD64_OpAtomicAnd32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (AtomicAnd32 ptr val mem) + // result: (ANDLlock ptr val mem) + for { + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64ANDLlock) + v.AddArg3(ptr, val, mem) + return true + } +} func rewriteValueAMD64_OpAtomicAnd8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -28607,6 +28626,21 @@ func rewriteValueAMD64_OpAtomicLoadPtr(v *Value) bool { return true } } +func rewriteValueAMD64_OpAtomicOr32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (AtomicOr32 ptr val mem) + // result: (ORLlock ptr val mem) + for { + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64ORLlock) + v.AddArg3(ptr, val, mem) + return true + } +} func rewriteValueAMD64_OpAtomicOr8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] -- 2.48.1