From 517a44d57e6a0834e98b763045fab9be8f4673d0 Mon Sep 17 00:00:00 2001 From: Michael Munday Date: Wed, 19 Oct 2016 16:41:01 -0400 Subject: [PATCH] cmd/compile: intrinsify atomic operations on s390x Implements the following intrinsics on s390x: - AtomicAdd{32,64} - AtomicCompareAndSwap{32,64} - AtomicExchange{32,64} - AtomicLoad{32,64,Ptr} - AtomicStore{32,64,PtrNoWB} I haven't added rules for And8 or Or8 yet. Change-Id: I647af023a8e513718e90e98a60191e7af6167314 Reviewed-on: https://go-review.googlesource.com/31614 Run-TryBot: Michael Munday TryBot-Result: Gobot Gobot Reviewed-by: Brad Fitzpatrick --- src/cmd/compile/internal/gc/ssa.go | 24 +- src/cmd/compile/internal/s390x/prog.go | 6 + src/cmd/compile/internal/s390x/ssa.go | 91 ++++++ src/cmd/compile/internal/ssa/gen/S390X.rules | 26 ++ src/cmd/compile/internal/ssa/gen/S390XOps.go | 58 ++++ src/cmd/compile/internal/ssa/opGen.go | 188 +++++++++++ src/cmd/compile/internal/ssa/rewriteS390X.go | 308 +++++++++++++++++++ 7 files changed, 689 insertions(+), 12 deletions(-) diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 45070c8d31..4186decbd3 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -2566,63 +2566,63 @@ func intrinsicInit() { v := s.newValue2(ssa.OpAtomicLoad32, ssa.MakeTuple(Types[TUINT32], ssa.TypeMem), s.intrinsicArg(n, 0), s.mem()) s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v) return s.newValue1(ssa.OpSelect0, Types[TUINT32], v) - }, sys.AMD64, sys.ARM64), + }, sys.AMD64, sys.ARM64, sys.S390X), intrinsicKey{"runtime/internal/atomic", "Load64"}: enableOnArch(func(s *state, n *Node) *ssa.Value { v := s.newValue2(ssa.OpAtomicLoad64, ssa.MakeTuple(Types[TUINT64], ssa.TypeMem), s.intrinsicArg(n, 0), s.mem()) s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v) return s.newValue1(ssa.OpSelect0, Types[TUINT64], v) - }, sys.AMD64, sys.ARM64), + }, sys.AMD64, sys.ARM64, sys.S390X), intrinsicKey{"runtime/internal/atomic", "Loadp"}: enableOnArch(func(s *state, n *Node) *ssa.Value { v := s.newValue2(ssa.OpAtomicLoadPtr, ssa.MakeTuple(ptrto(Types[TUINT8]), ssa.TypeMem), s.intrinsicArg(n, 0), s.mem()) s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v) return s.newValue1(ssa.OpSelect0, ptrto(Types[TUINT8]), v) - }, sys.AMD64, sys.ARM64), + }, sys.AMD64, sys.ARM64, sys.S390X), intrinsicKey{"runtime/internal/atomic", "Store"}: enableOnArch(func(s *state, n *Node) *ssa.Value { s.vars[&memVar] = s.newValue3(ssa.OpAtomicStore32, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem()) return nil - }, sys.AMD64, sys.ARM64), + }, sys.AMD64, sys.ARM64, sys.S390X), intrinsicKey{"runtime/internal/atomic", "Store64"}: enableOnArch(func(s *state, n *Node) *ssa.Value { s.vars[&memVar] = s.newValue3(ssa.OpAtomicStore64, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem()) return nil - }, sys.AMD64, sys.ARM64), + }, sys.AMD64, sys.ARM64, sys.S390X), intrinsicKey{"runtime/internal/atomic", "StorepNoWB"}: enableOnArch(func(s *state, n *Node) *ssa.Value { s.vars[&memVar] = s.newValue3(ssa.OpAtomicStorePtrNoWB, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem()) return nil - }, sys.AMD64, sys.ARM64), + }, sys.AMD64, sys.ARM64, sys.S390X), intrinsicKey{"runtime/internal/atomic", "Xchg"}: enableOnArch(func(s *state, n *Node) *ssa.Value { v := s.newValue3(ssa.OpAtomicExchange32, ssa.MakeTuple(Types[TUINT32], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem()) s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v) return s.newValue1(ssa.OpSelect0, Types[TUINT32], v) - }, sys.AMD64, sys.ARM64), + }, sys.AMD64, sys.ARM64, sys.S390X), intrinsicKey{"runtime/internal/atomic", "Xchg64"}: enableOnArch(func(s *state, n *Node) *ssa.Value { v := s.newValue3(ssa.OpAtomicExchange64, ssa.MakeTuple(Types[TUINT64], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem()) s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v) return s.newValue1(ssa.OpSelect0, Types[TUINT64], v) - }, sys.AMD64, sys.ARM64), + }, sys.AMD64, sys.ARM64, sys.S390X), intrinsicKey{"runtime/internal/atomic", "Xadd"}: enableOnArch(func(s *state, n *Node) *ssa.Value { v := s.newValue3(ssa.OpAtomicAdd32, ssa.MakeTuple(Types[TUINT32], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem()) s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v) return s.newValue1(ssa.OpSelect0, Types[TUINT32], v) - }, sys.AMD64, sys.ARM64), + }, sys.AMD64, sys.ARM64, sys.S390X), intrinsicKey{"runtime/internal/atomic", "Xadd64"}: enableOnArch(func(s *state, n *Node) *ssa.Value { v := s.newValue3(ssa.OpAtomicAdd64, ssa.MakeTuple(Types[TUINT64], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem()) s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v) return s.newValue1(ssa.OpSelect0, Types[TUINT64], v) - }, sys.AMD64, sys.ARM64), + }, sys.AMD64, sys.ARM64, sys.S390X), intrinsicKey{"runtime/internal/atomic", "Cas"}: enableOnArch(func(s *state, n *Node) *ssa.Value { v := s.newValue4(ssa.OpAtomicCompareAndSwap32, ssa.MakeTuple(Types[TBOOL], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.intrinsicArg(n, 2), s.mem()) s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v) return s.newValue1(ssa.OpSelect0, Types[TBOOL], v) - }, sys.AMD64, sys.ARM64), + }, sys.AMD64, sys.ARM64, sys.S390X), intrinsicKey{"runtime/internal/atomic", "Cas64"}: enableOnArch(func(s *state, n *Node) *ssa.Value { v := s.newValue4(ssa.OpAtomicCompareAndSwap64, ssa.MakeTuple(Types[TBOOL], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.intrinsicArg(n, 2), s.mem()) s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v) return s.newValue1(ssa.OpSelect0, Types[TBOOL], v) - }, sys.AMD64, sys.ARM64), + }, sys.AMD64, sys.ARM64, sys.S390X), intrinsicKey{"runtime/internal/atomic", "And8"}: enableOnArch(func(s *state, n *Node) *ssa.Value { s.vars[&memVar] = s.newValue3(ssa.OpAtomicAnd8, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem()) diff --git a/src/cmd/compile/internal/s390x/prog.go b/src/cmd/compile/internal/s390x/prog.go index 9e974e9125..f356617d00 100644 --- a/src/cmd/compile/internal/s390x/prog.go +++ b/src/cmd/compile/internal/s390x/prog.go @@ -158,6 +158,12 @@ var progtable = [s390x.ALAST & obj.AMask]gc.ProgInfo{ s390x.ACMPUBGT & obj.AMask: {Flags: gc.Cjmp}, s390x.ACMPUBLE & obj.AMask: {Flags: gc.Cjmp}, + // Atomic + s390x.ACS & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.LeftWrite | gc.RegRead | gc.RightRead | gc.RightWrite}, + s390x.ACSG & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.LeftWrite | gc.RegRead | gc.RightRead | gc.RightWrite}, + s390x.ALAA & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightRead | gc.RightWrite}, + s390x.ALAAG & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightRead | gc.RightWrite}, + // Macros s390x.ACLEAR & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightAddr | gc.RightWrite}, diff --git a/src/cmd/compile/internal/s390x/ssa.go b/src/cmd/compile/internal/s390x/ssa.go index eb1975abf0..e8b7b4ba6e 100644 --- a/src/cmd/compile/internal/s390x/ssa.go +++ b/src/cmd/compile/internal/s390x/ssa.go @@ -546,6 +546,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.To.Reg = v.Reg() case ssa.OpSP, ssa.OpSB: // nothing to do + case ssa.OpSelect0, ssa.OpSelect1: + // nothing to do case ssa.OpVarDef: gc.Gvardef(v.Aux.(*gc.Node)) case ssa.OpVarKill: @@ -558,6 +560,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) case ssa.OpS390XFlagEQ, ssa.OpS390XFlagLT, ssa.OpS390XFlagGT: v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) + case ssa.OpS390XAddTupleFirst32, ssa.OpS390XAddTupleFirst64: + v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString()) case ssa.OpS390XLoweredNilCheck: // Issue a load which will fault if the input is nil. p := gc.Prog(s390x.AMOVBZ) @@ -686,6 +690,93 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { clear.To.Type = obj.TYPE_MEM clear.To.Reg = v.Args[0].Reg() } + case ssa.OpS390XMOVWZatomicload, ssa.OpS390XMOVDatomicload: + p := gc.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_MEM + p.From.Reg = v.Args[0].Reg() + gc.AddAux(&p.From, v) + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg0() + case ssa.OpS390XMOVWatomicstore, ssa.OpS390XMOVDatomicstore: + p := gc.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[1].Reg() + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + gc.AddAux(&p.To, v) + case ssa.OpS390XLAA, ssa.OpS390XLAAG: + p := gc.Prog(v.Op.Asm()) + p.Reg = v.Reg0() + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[1].Reg() + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + gc.AddAux(&p.To, v) + case ssa.OpS390XLoweredAtomicCas32, ssa.OpS390XLoweredAtomicCas64: + // Convert the flags output of CS{,G} into a bool. + // CS{,G} arg1, arg2, arg0 + // MOVD $0, ret + // BNE 2(PC) + // MOVD $1, ret + // NOP (so the BNE has somewhere to land) + + // CS{,G} arg1, arg2, arg0 + cs := gc.Prog(v.Op.Asm()) + cs.From.Type = obj.TYPE_REG + cs.From.Reg = v.Args[1].Reg() // old + cs.Reg = v.Args[2].Reg() // new + cs.To.Type = obj.TYPE_MEM + cs.To.Reg = v.Args[0].Reg() + gc.AddAux(&cs.To, v) + + // MOVD $0, ret + movd := gc.Prog(s390x.AMOVD) + movd.From.Type = obj.TYPE_CONST + movd.From.Offset = 0 + movd.To.Type = obj.TYPE_REG + movd.To.Reg = v.Reg0() + + // BNE 2(PC) + bne := gc.Prog(s390x.ABNE) + bne.To.Type = obj.TYPE_BRANCH + + // MOVD $1, ret + movd = gc.Prog(s390x.AMOVD) + movd.From.Type = obj.TYPE_CONST + movd.From.Offset = 1 + movd.To.Type = obj.TYPE_REG + movd.To.Reg = v.Reg0() + + // NOP (so the BNE has somewhere to land) + nop := gc.Prog(obj.ANOP) + gc.Patch(bne, nop) + case ssa.OpS390XLoweredAtomicExchange32, ssa.OpS390XLoweredAtomicExchange64: + // Loop until the CS{,G} succeeds. + // MOV{WZ,D} arg0, ret + // cs: CS{,G} ret, arg1, arg0 + // BNE cs + + // MOV{WZ,D} arg0, ret + load := gc.Prog(loadByType(v.Type.FieldType(0))) + load.From.Type = obj.TYPE_MEM + load.From.Reg = v.Args[0].Reg() + load.To.Type = obj.TYPE_REG + load.To.Reg = v.Reg0() + gc.AddAux(&load.From, v) + + // CS{,G} ret, arg1, arg0 + cs := gc.Prog(v.Op.Asm()) + cs.From.Type = obj.TYPE_REG + cs.From.Reg = v.Reg0() // old + cs.Reg = v.Args[1].Reg() // new + cs.To.Type = obj.TYPE_MEM + cs.To.Reg = v.Args[0].Reg() + gc.AddAux(&cs.To, v) + + // BNE cs + bne := gc.Prog(s390x.ABNE) + bne.To.Type = obj.TYPE_BRANCH + gc.Patch(bne, cs) default: v.Fatalf("genValue not implemented: %s", v.LongString()) } diff --git a/src/cmd/compile/internal/ssa/gen/S390X.rules b/src/cmd/compile/internal/ssa/gen/S390X.rules index fa628d4c8b..2dfc21e8a5 100644 --- a/src/cmd/compile/internal/ssa/gen/S390X.rules +++ b/src/cmd/compile/internal/ssa/gen/S390X.rules @@ -110,6 +110,32 @@ (Sqrt x) -> (FSQRT x) +// Atomic loads. +(AtomicLoad32 ptr mem) -> (MOVWZatomicload ptr mem) +(AtomicLoad64 ptr mem) -> (MOVDatomicload ptr mem) +(AtomicLoadPtr ptr mem) -> (MOVDatomicload ptr mem) + +// Atomic stores. +(AtomicStore32 ptr val mem) -> (MOVWatomicstore ptr val mem) +(AtomicStore64 ptr val mem) -> (MOVDatomicstore ptr val mem) +(AtomicStorePtrNoWB ptr val mem) -> (MOVDatomicstore ptr val mem) + +// Atomic adds. +(AtomicAdd32 ptr val mem) -> (AddTupleFirst32 (LAA ptr val mem) val) +(AtomicAdd64 ptr val mem) -> (AddTupleFirst64 (LAAG ptr val mem) val) +(Select0 (AddTupleFirst32 tuple val)) -> (ADDW val (Select0 tuple)) +(Select1 (AddTupleFirst32 tuple _ )) -> (Select1 tuple) +(Select0 (AddTupleFirst64 tuple val)) -> (ADD val (Select0 tuple)) +(Select1 (AddTupleFirst64 tuple _ )) -> (Select1 tuple) + +// Atomic exchanges. +(AtomicExchange32 ptr val mem) -> (LoweredAtomicExchange32 ptr val mem) +(AtomicExchange64 ptr val mem) -> (LoweredAtomicExchange64 ptr val mem) + +// Atomic compare and swap. +(AtomicCompareAndSwap32 ptr old new_ mem) -> (LoweredAtomicCas32 ptr old new_ mem) +(AtomicCompareAndSwap64 ptr old new_ mem) -> (LoweredAtomicCas64 ptr old new_ mem) + // Lowering extension // Note: we always extend to 64 bits even though some ops don't need that many result bits. (SignExt8to16 x) -> (MOVBreg x) diff --git a/src/cmd/compile/internal/ssa/gen/S390XOps.go b/src/cmd/compile/internal/ssa/gen/S390XOps.go index 7f1f5f928f..7a25c26784 100644 --- a/src/cmd/compile/internal/ssa/gen/S390XOps.go +++ b/src/cmd/compile/internal/ssa/gen/S390XOps.go @@ -135,6 +135,7 @@ func init() { gpstoreconst = regInfo{inputs: []regMask{ptrspsb, 0}} gpstoreidx = regInfo{inputs: []regMask{ptrsp, ptrsp, gpsp, 0}} gpstorebr = regInfo{inputs: []regMask{ptrsp, gpsp, 0}} + gpstorelaa = regInfo{inputs: []regMask{ptrspsb, gpsp, 0}, outputs: gponly} gpmvc = regInfo{inputs: []regMask{ptrsp, ptrsp, 0}} @@ -152,6 +153,15 @@ func init() { fpstore = regInfo{inputs: []regMask{ptrspsb, fp, 0}} fpstoreidx = regInfo{inputs: []regMask{ptrsp, ptrsp, fp, 0}} + + // LoweredAtomicCas may overwrite arg1, so force it to R0 for now. + cas = regInfo{inputs: []regMask{ptrsp, r0, gpsp, 0}, outputs: []regMask{gp, 0}, clobbers: r0} + + // LoweredAtomicExchange overwrites the output before executing + // CS{,G}, so the output register must not be the same as the + // input register. For now we just force the output register to + // R0. + exchange = regInfo{inputs: []regMask{ptrsp, gpsp &^ r0, 0}, outputs: []regMask{r0, 0}} ) var S390Xops = []opData{ @@ -408,6 +418,54 @@ func init() { {name: "FlagLT"}, // < {name: "FlagGT"}, // > + // Atomic loads. These are just normal loads but return tuples + // so they can be properly ordered with other loads. + // load from arg0+auxint+aux. arg1=mem. + {name: "MOVWZatomicload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", faultOnNilArg0: true}, + {name: "MOVDatomicload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", faultOnNilArg0: true}, + + // Atomic stores. These are just normal stores. + // store arg1 to arg0+auxint+aux. arg2=mem. + {name: "MOVWatomicstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true}, + {name: "MOVDatomicstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true}, + + // Atomic adds. + // *(arg0+auxint+aux) += arg1. arg2=mem. + // Returns a tuple of . + {name: "LAA", argLength: 3, reg: gpstorelaa, asm: "LAA", typ: "(UInt32,Mem)", aux: "SymOff", faultOnNilArg0: true}, + {name: "LAAG", argLength: 3, reg: gpstorelaa, asm: "LAAG", typ: "(UInt64,Mem)", aux: "SymOff", faultOnNilArg0: true}, + {name: "AddTupleFirst32", argLength: 2}, // arg0=tuple . Returns . + {name: "AddTupleFirst64", argLength: 2}, // arg0=tuple . Returns . + + // Compare and swap. + // arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. + // if *(arg0+auxint+aux) == arg1 { + // *(arg0+auxint+aux) = arg2 + // return (true, memory) + // } else { + // return (false, memory) + // } + // Note that these instructions also return the old value in arg1, but we ignore it. + // TODO: have these return flags instead of bool. The current system generates: + // CS ... + // MOVD $0, ret + // BNE 2(PC) + // MOVD $1, ret + // CMPW ret, $0 + // BNE ... + // instead of just + // CS ... + // BEQ ... + // but we can't do that because memory-using ops can't generate flags yet + // (flagalloc wants to move flag-generating instructions around). + {name: "LoweredAtomicCas32", argLength: 4, reg: cas, asm: "CS", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true}, + {name: "LoweredAtomicCas64", argLength: 4, reg: cas, asm: "CSG", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true}, + + // Lowered atomic swaps, emulated using compare-and-swap. + // store arg1 to arg0+auxint+aux, arg2=mem. + {name: "LoweredAtomicExchange32", argLength: 3, reg: exchange, asm: "CS", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true}, + {name: "LoweredAtomicExchange64", argLength: 3, reg: exchange, asm: "CSG", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true}, + // find leftmost one { name: "FLOGR", diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 3c2714063f..b740f2a68a 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1417,6 +1417,18 @@ const ( OpS390XFlagEQ OpS390XFlagLT OpS390XFlagGT + OpS390XMOVWZatomicload + OpS390XMOVDatomicload + OpS390XMOVWatomicstore + OpS390XMOVDatomicstore + OpS390XLAA + OpS390XLAAG + OpS390XAddTupleFirst32 + OpS390XAddTupleFirst64 + OpS390XLoweredAtomicCas32 + OpS390XLoweredAtomicCas64 + OpS390XLoweredAtomicExchange32 + OpS390XLoweredAtomicExchange64 OpS390XFLOGR OpS390XSTMG2 OpS390XSTMG3 @@ -17933,6 +17945,182 @@ var opcodeTable = [...]opInfo{ argLen: 0, reg: regInfo{}, }, + { + name: "MOVWZatomicload", + auxType: auxSymOff, + argLen: 2, + faultOnNilArg0: true, + asm: s390x.AMOVWZ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295021566}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP SB + }, + outputs: []outputInfo{ + {0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 + }, + }, + }, + { + name: "MOVDatomicload", + auxType: auxSymOff, + argLen: 2, + faultOnNilArg0: true, + asm: s390x.AMOVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295021566}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP SB + }, + outputs: []outputInfo{ + {0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 + }, + }, + }, + { + name: "MOVWatomicstore", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + asm: s390x.AMOVW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295021566}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP SB + {1, 54271}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP + }, + }, + }, + { + name: "MOVDatomicstore", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + asm: s390x.AMOVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295021566}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP SB + {1, 54271}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP + }, + }, + }, + { + name: "LAA", + auxType: auxSymOff, + argLen: 3, + faultOnNilArg0: true, + asm: s390x.ALAA, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295021566}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP SB + {1, 54271}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP + }, + outputs: []outputInfo{ + {0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 + }, + }, + }, + { + name: "LAAG", + auxType: auxSymOff, + argLen: 3, + faultOnNilArg0: true, + asm: s390x.ALAAG, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295021566}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP SB + {1, 54271}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP + }, + outputs: []outputInfo{ + {0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 + }, + }, + }, + { + name: "AddTupleFirst32", + argLen: 2, + reg: regInfo{}, + }, + { + name: "AddTupleFirst64", + argLen: 2, + reg: regInfo{}, + }, + { + name: "LoweredAtomicCas32", + auxType: auxSymOff, + argLen: 4, + clobberFlags: true, + faultOnNilArg0: true, + asm: s390x.ACS, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1}, // R0 + {0, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP + {2, 54271}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP + }, + clobbers: 1, // R0 + outputs: []outputInfo{ + {1, 0}, + {0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 + }, + }, + }, + { + name: "LoweredAtomicCas64", + auxType: auxSymOff, + argLen: 4, + clobberFlags: true, + faultOnNilArg0: true, + asm: s390x.ACSG, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1}, // R0 + {0, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP + {2, 54271}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP + }, + clobbers: 1, // R0 + outputs: []outputInfo{ + {1, 0}, + {0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 + }, + }, + }, + { + name: "LoweredAtomicExchange32", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + asm: s390x.ACS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP + {1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP + }, + outputs: []outputInfo{ + {1, 0}, + {0, 1}, // R0 + }, + }, + }, + { + name: "LoweredAtomicExchange64", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + asm: s390x.ACSG, + reg: regInfo{ + inputs: []inputInfo{ + {0, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP + {1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP + }, + outputs: []outputInfo{ + {1, 0}, + {0, 1}, // R0 + }, + }, + }, { name: "FLOGR", argLen: 1, diff --git a/src/cmd/compile/internal/ssa/rewriteS390X.go b/src/cmd/compile/internal/ssa/rewriteS390X.go index 004ff45f3c..4027fa7bcf 100644 --- a/src/cmd/compile/internal/ssa/rewriteS390X.go +++ b/src/cmd/compile/internal/ssa/rewriteS390X.go @@ -34,6 +34,30 @@ func rewriteValueS390X(v *Value, config *Config) bool { return rewriteValueS390X_OpAnd8(v, config) case OpAndB: return rewriteValueS390X_OpAndB(v, config) + case OpAtomicAdd32: + return rewriteValueS390X_OpAtomicAdd32(v, config) + case OpAtomicAdd64: + return rewriteValueS390X_OpAtomicAdd64(v, config) + case OpAtomicCompareAndSwap32: + return rewriteValueS390X_OpAtomicCompareAndSwap32(v, config) + case OpAtomicCompareAndSwap64: + return rewriteValueS390X_OpAtomicCompareAndSwap64(v, config) + case OpAtomicExchange32: + return rewriteValueS390X_OpAtomicExchange32(v, config) + case OpAtomicExchange64: + return rewriteValueS390X_OpAtomicExchange64(v, config) + case OpAtomicLoad32: + return rewriteValueS390X_OpAtomicLoad32(v, config) + case OpAtomicLoad64: + return rewriteValueS390X_OpAtomicLoad64(v, config) + case OpAtomicLoadPtr: + return rewriteValueS390X_OpAtomicLoadPtr(v, config) + case OpAtomicStore32: + return rewriteValueS390X_OpAtomicStore32(v, config) + case OpAtomicStore64: + return rewriteValueS390X_OpAtomicStore64(v, config) + case OpAtomicStorePtrNoWB: + return rewriteValueS390X_OpAtomicStorePtrNoWB(v, config) case OpAvg64u: return rewriteValueS390X_OpAvg64u(v, config) case OpBswap32: @@ -612,6 +636,10 @@ func rewriteValueS390X(v *Value, config *Config) bool { return rewriteValueS390X_OpS390XXORWconst(v, config) case OpS390XXORconst: return rewriteValueS390X_OpS390XXORconst(v, config) + case OpSelect0: + return rewriteValueS390X_OpSelect0(v, config) + case OpSelect1: + return rewriteValueS390X_OpSelect1(v, config) case OpSignExt16to32: return rewriteValueS390X_OpSignExt16to32(v, config) case OpSignExt16to64: @@ -876,6 +904,214 @@ func rewriteValueS390X_OpAndB(v *Value, config *Config) bool { return true } } +func rewriteValueS390X_OpAtomicAdd32(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (AtomicAdd32 ptr val mem) + // cond: + // result: (AddTupleFirst32 (LAA ptr val mem) val) + for { + ptr := v.Args[0] + val := v.Args[1] + mem := v.Args[2] + v.reset(OpS390XAddTupleFirst32) + v0 := b.NewValue0(v.Line, OpS390XLAA, MakeTuple(config.fe.TypeUInt32(), TypeMem)) + v0.AddArg(ptr) + v0.AddArg(val) + v0.AddArg(mem) + v.AddArg(v0) + v.AddArg(val) + return true + } +} +func rewriteValueS390X_OpAtomicAdd64(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (AtomicAdd64 ptr val mem) + // cond: + // result: (AddTupleFirst64 (LAAG ptr val mem) val) + for { + ptr := v.Args[0] + val := v.Args[1] + mem := v.Args[2] + v.reset(OpS390XAddTupleFirst64) + v0 := b.NewValue0(v.Line, OpS390XLAAG, MakeTuple(config.fe.TypeUInt64(), TypeMem)) + v0.AddArg(ptr) + v0.AddArg(val) + v0.AddArg(mem) + v.AddArg(v0) + v.AddArg(val) + return true + } +} +func rewriteValueS390X_OpAtomicCompareAndSwap32(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (AtomicCompareAndSwap32 ptr old new_ mem) + // cond: + // result: (LoweredAtomicCas32 ptr old new_ mem) + for { + ptr := v.Args[0] + old := v.Args[1] + new_ := v.Args[2] + mem := v.Args[3] + v.reset(OpS390XLoweredAtomicCas32) + v.AddArg(ptr) + v.AddArg(old) + v.AddArg(new_) + v.AddArg(mem) + return true + } +} +func rewriteValueS390X_OpAtomicCompareAndSwap64(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (AtomicCompareAndSwap64 ptr old new_ mem) + // cond: + // result: (LoweredAtomicCas64 ptr old new_ mem) + for { + ptr := v.Args[0] + old := v.Args[1] + new_ := v.Args[2] + mem := v.Args[3] + v.reset(OpS390XLoweredAtomicCas64) + v.AddArg(ptr) + v.AddArg(old) + v.AddArg(new_) + v.AddArg(mem) + return true + } +} +func rewriteValueS390X_OpAtomicExchange32(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (AtomicExchange32 ptr val mem) + // cond: + // result: (LoweredAtomicExchange32 ptr val mem) + for { + ptr := v.Args[0] + val := v.Args[1] + mem := v.Args[2] + v.reset(OpS390XLoweredAtomicExchange32) + v.AddArg(ptr) + v.AddArg(val) + v.AddArg(mem) + return true + } +} +func rewriteValueS390X_OpAtomicExchange64(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (AtomicExchange64 ptr val mem) + // cond: + // result: (LoweredAtomicExchange64 ptr val mem) + for { + ptr := v.Args[0] + val := v.Args[1] + mem := v.Args[2] + v.reset(OpS390XLoweredAtomicExchange64) + v.AddArg(ptr) + v.AddArg(val) + v.AddArg(mem) + return true + } +} +func rewriteValueS390X_OpAtomicLoad32(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (AtomicLoad32 ptr mem) + // cond: + // result: (MOVWZatomicload ptr mem) + for { + ptr := v.Args[0] + mem := v.Args[1] + v.reset(OpS390XMOVWZatomicload) + v.AddArg(ptr) + v.AddArg(mem) + return true + } +} +func rewriteValueS390X_OpAtomicLoad64(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (AtomicLoad64 ptr mem) + // cond: + // result: (MOVDatomicload ptr mem) + for { + ptr := v.Args[0] + mem := v.Args[1] + v.reset(OpS390XMOVDatomicload) + v.AddArg(ptr) + v.AddArg(mem) + return true + } +} +func rewriteValueS390X_OpAtomicLoadPtr(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (AtomicLoadPtr ptr mem) + // cond: + // result: (MOVDatomicload ptr mem) + for { + ptr := v.Args[0] + mem := v.Args[1] + v.reset(OpS390XMOVDatomicload) + v.AddArg(ptr) + v.AddArg(mem) + return true + } +} +func rewriteValueS390X_OpAtomicStore32(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (AtomicStore32 ptr val mem) + // cond: + // result: (MOVWatomicstore ptr val mem) + for { + ptr := v.Args[0] + val := v.Args[1] + mem := v.Args[2] + v.reset(OpS390XMOVWatomicstore) + v.AddArg(ptr) + v.AddArg(val) + v.AddArg(mem) + return true + } +} +func rewriteValueS390X_OpAtomicStore64(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (AtomicStore64 ptr val mem) + // cond: + // result: (MOVDatomicstore ptr val mem) + for { + ptr := v.Args[0] + val := v.Args[1] + mem := v.Args[2] + v.reset(OpS390XMOVDatomicstore) + v.AddArg(ptr) + v.AddArg(val) + v.AddArg(mem) + return true + } +} +func rewriteValueS390X_OpAtomicStorePtrNoWB(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (AtomicStorePtrNoWB ptr val mem) + // cond: + // result: (MOVDatomicstore ptr val mem) + for { + ptr := v.Args[0] + val := v.Args[1] + mem := v.Args[2] + v.reset(OpS390XMOVDatomicstore) + v.AddArg(ptr) + v.AddArg(val) + v.AddArg(mem) + return true + } +} func rewriteValueS390X_OpAvg64u(v *Value, config *Config) bool { b := v.Block _ = b @@ -16764,6 +17000,78 @@ func rewriteValueS390X_OpS390XXORconst(v *Value, config *Config) bool { } return false } +func rewriteValueS390X_OpSelect0(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (Select0 (AddTupleFirst32 tuple val)) + // cond: + // result: (ADDW val (Select0 tuple)) + for { + t := v.Type + v_0 := v.Args[0] + if v_0.Op != OpS390XAddTupleFirst32 { + break + } + tuple := v_0.Args[0] + val := v_0.Args[1] + v.reset(OpS390XADDW) + v.AddArg(val) + v0 := b.NewValue0(v.Line, OpSelect0, t) + v0.AddArg(tuple) + v.AddArg(v0) + return true + } + // match: (Select0 (AddTupleFirst64 tuple val)) + // cond: + // result: (ADD val (Select0 tuple)) + for { + t := v.Type + v_0 := v.Args[0] + if v_0.Op != OpS390XAddTupleFirst64 { + break + } + tuple := v_0.Args[0] + val := v_0.Args[1] + v.reset(OpS390XADD) + v.AddArg(val) + v0 := b.NewValue0(v.Line, OpSelect0, t) + v0.AddArg(tuple) + v.AddArg(v0) + return true + } + return false +} +func rewriteValueS390X_OpSelect1(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (Select1 (AddTupleFirst32 tuple _ )) + // cond: + // result: (Select1 tuple) + for { + v_0 := v.Args[0] + if v_0.Op != OpS390XAddTupleFirst32 { + break + } + tuple := v_0.Args[0] + v.reset(OpSelect1) + v.AddArg(tuple) + return true + } + // match: (Select1 (AddTupleFirst64 tuple _ )) + // cond: + // result: (Select1 tuple) + for { + v_0 := v.Args[0] + if v_0.Op != OpS390XAddTupleFirst64 { + break + } + tuple := v_0.Args[0] + v.reset(OpSelect1) + v.AddArg(tuple) + return true + } + return false +} func rewriteValueS390X_OpSignExt16to32(v *Value, config *Config) bool { b := v.Block _ = b -- 2.48.1