]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: intrinsify atomic operations on s390x
authorMichael Munday <munday@ca.ibm.com>
Wed, 19 Oct 2016 20:41:01 +0000 (16:41 -0400)
committerMichael Munday <munday@ca.ibm.com>
Tue, 25 Oct 2016 12:23:49 +0000 (12:23 +0000)
Implements the following intrinsics on s390x:
 - AtomicAdd{32,64}
 - AtomicCompareAndSwap{32,64}
 - AtomicExchange{32,64}
 - AtomicLoad{32,64,Ptr}
 - AtomicStore{32,64,PtrNoWB}

I haven't added rules for And8 or Or8 yet.

Change-Id: I647af023a8e513718e90e98a60191e7af6167314
Reviewed-on: https://go-review.googlesource.com/31614
Run-TryBot: Michael Munday <munday@ca.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/s390x/prog.go
src/cmd/compile/internal/s390x/ssa.go
src/cmd/compile/internal/ssa/gen/S390X.rules
src/cmd/compile/internal/ssa/gen/S390XOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteS390X.go

index 45070c8d310690f2d19413726da7dc6a54d279c1..4186decbd314a61f90f6728cc6fb4b66ba3f3e24 100644 (file)
@@ -2566,63 +2566,63 @@ func intrinsicInit() {
                        v := s.newValue2(ssa.OpAtomicLoad32, ssa.MakeTuple(Types[TUINT32], ssa.TypeMem), s.intrinsicArg(n, 0), s.mem())
                        s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
                        return s.newValue1(ssa.OpSelect0, Types[TUINT32], v)
-               }, sys.AMD64, sys.ARM64),
+               }, sys.AMD64, sys.ARM64, sys.S390X),
                intrinsicKey{"runtime/internal/atomic", "Load64"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
                        v := s.newValue2(ssa.OpAtomicLoad64, ssa.MakeTuple(Types[TUINT64], ssa.TypeMem), s.intrinsicArg(n, 0), s.mem())
                        s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
                        return s.newValue1(ssa.OpSelect0, Types[TUINT64], v)
-               }, sys.AMD64, sys.ARM64),
+               }, sys.AMD64, sys.ARM64, sys.S390X),
                intrinsicKey{"runtime/internal/atomic", "Loadp"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
                        v := s.newValue2(ssa.OpAtomicLoadPtr, ssa.MakeTuple(ptrto(Types[TUINT8]), ssa.TypeMem), s.intrinsicArg(n, 0), s.mem())
                        s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
                        return s.newValue1(ssa.OpSelect0, ptrto(Types[TUINT8]), v)
-               }, sys.AMD64, sys.ARM64),
+               }, sys.AMD64, sys.ARM64, sys.S390X),
 
                intrinsicKey{"runtime/internal/atomic", "Store"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
                        s.vars[&memVar] = s.newValue3(ssa.OpAtomicStore32, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
                        return nil
-               }, sys.AMD64, sys.ARM64),
+               }, sys.AMD64, sys.ARM64, sys.S390X),
                intrinsicKey{"runtime/internal/atomic", "Store64"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
                        s.vars[&memVar] = s.newValue3(ssa.OpAtomicStore64, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
                        return nil
-               }, sys.AMD64, sys.ARM64),
+               }, sys.AMD64, sys.ARM64, sys.S390X),
                intrinsicKey{"runtime/internal/atomic", "StorepNoWB"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
                        s.vars[&memVar] = s.newValue3(ssa.OpAtomicStorePtrNoWB, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
                        return nil
-               }, sys.AMD64, sys.ARM64),
+               }, sys.AMD64, sys.ARM64, sys.S390X),
 
                intrinsicKey{"runtime/internal/atomic", "Xchg"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
                        v := s.newValue3(ssa.OpAtomicExchange32, ssa.MakeTuple(Types[TUINT32], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
                        s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
                        return s.newValue1(ssa.OpSelect0, Types[TUINT32], v)
-               }, sys.AMD64, sys.ARM64),
+               }, sys.AMD64, sys.ARM64, sys.S390X),
                intrinsicKey{"runtime/internal/atomic", "Xchg64"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
                        v := s.newValue3(ssa.OpAtomicExchange64, ssa.MakeTuple(Types[TUINT64], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
                        s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
                        return s.newValue1(ssa.OpSelect0, Types[TUINT64], v)
-               }, sys.AMD64, sys.ARM64),
+               }, sys.AMD64, sys.ARM64, sys.S390X),
 
                intrinsicKey{"runtime/internal/atomic", "Xadd"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
                        v := s.newValue3(ssa.OpAtomicAdd32, ssa.MakeTuple(Types[TUINT32], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
                        s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
                        return s.newValue1(ssa.OpSelect0, Types[TUINT32], v)
-               }, sys.AMD64, sys.ARM64),
+               }, sys.AMD64, sys.ARM64, sys.S390X),
                intrinsicKey{"runtime/internal/atomic", "Xadd64"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
                        v := s.newValue3(ssa.OpAtomicAdd64, ssa.MakeTuple(Types[TUINT64], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
                        s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
                        return s.newValue1(ssa.OpSelect0, Types[TUINT64], v)
-               }, sys.AMD64, sys.ARM64),
+               }, sys.AMD64, sys.ARM64, sys.S390X),
 
                intrinsicKey{"runtime/internal/atomic", "Cas"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
                        v := s.newValue4(ssa.OpAtomicCompareAndSwap32, ssa.MakeTuple(Types[TBOOL], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.intrinsicArg(n, 2), s.mem())
                        s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
                        return s.newValue1(ssa.OpSelect0, Types[TBOOL], v)
-               }, sys.AMD64, sys.ARM64),
+               }, sys.AMD64, sys.ARM64, sys.S390X),
                intrinsicKey{"runtime/internal/atomic", "Cas64"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
                        v := s.newValue4(ssa.OpAtomicCompareAndSwap64, ssa.MakeTuple(Types[TBOOL], ssa.TypeMem), s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.intrinsicArg(n, 2), s.mem())
                        s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
                        return s.newValue1(ssa.OpSelect0, Types[TBOOL], v)
-               }, sys.AMD64, sys.ARM64),
+               }, sys.AMD64, sys.ARM64, sys.S390X),
 
                intrinsicKey{"runtime/internal/atomic", "And8"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
                        s.vars[&memVar] = s.newValue3(ssa.OpAtomicAnd8, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
index 9e974e9125b13bd1c8e25ac178feb1cd31cabc98..f356617d00b0905eaedea9d77fc547467c92cf68 100644 (file)
@@ -158,6 +158,12 @@ var progtable = [s390x.ALAST & obj.AMask]gc.ProgInfo{
        s390x.ACMPUBGT & obj.AMask: {Flags: gc.Cjmp},
        s390x.ACMPUBLE & obj.AMask: {Flags: gc.Cjmp},
 
+       // Atomic
+       s390x.ACS & obj.AMask:   {Flags: gc.SizeL | gc.LeftRead | gc.LeftWrite | gc.RegRead | gc.RightRead | gc.RightWrite},
+       s390x.ACSG & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.LeftWrite | gc.RegRead | gc.RightRead | gc.RightWrite},
+       s390x.ALAA & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | gc.RightRead | gc.RightWrite},
+       s390x.ALAAG & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightRead | gc.RightWrite},
+
        // Macros
        s390x.ACLEAR & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightAddr | gc.RightWrite},
 
index eb1975abf09dd8c01653a38c123715b01a943425..e8b7b4ba6e2186fb88d4c597e03dd875d241a9dc 100644 (file)
@@ -546,6 +546,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.To.Reg = v.Reg()
        case ssa.OpSP, ssa.OpSB:
                // nothing to do
+       case ssa.OpSelect0, ssa.OpSelect1:
+               // nothing to do
        case ssa.OpVarDef:
                gc.Gvardef(v.Aux.(*gc.Node))
        case ssa.OpVarKill:
@@ -558,6 +560,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
        case ssa.OpS390XFlagEQ, ssa.OpS390XFlagLT, ssa.OpS390XFlagGT:
                v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
+       case ssa.OpS390XAddTupleFirst32, ssa.OpS390XAddTupleFirst64:
+               v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString())
        case ssa.OpS390XLoweredNilCheck:
                // Issue a load which will fault if the input is nil.
                p := gc.Prog(s390x.AMOVBZ)
@@ -686,6 +690,93 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                        clear.To.Type = obj.TYPE_MEM
                        clear.To.Reg = v.Args[0].Reg()
                }
+       case ssa.OpS390XMOVWZatomicload, ssa.OpS390XMOVDatomicload:
+               p := gc.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_MEM
+               p.From.Reg = v.Args[0].Reg()
+               gc.AddAux(&p.From, v)
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = v.Reg0()
+       case ssa.OpS390XMOVWatomicstore, ssa.OpS390XMOVDatomicstore:
+               p := gc.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = v.Args[1].Reg()
+               p.To.Type = obj.TYPE_MEM
+               p.To.Reg = v.Args[0].Reg()
+               gc.AddAux(&p.To, v)
+       case ssa.OpS390XLAA, ssa.OpS390XLAAG:
+               p := gc.Prog(v.Op.Asm())
+               p.Reg = v.Reg0()
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = v.Args[1].Reg()
+               p.To.Type = obj.TYPE_MEM
+               p.To.Reg = v.Args[0].Reg()
+               gc.AddAux(&p.To, v)
+       case ssa.OpS390XLoweredAtomicCas32, ssa.OpS390XLoweredAtomicCas64:
+               // Convert the flags output of CS{,G} into a bool.
+               //    CS{,G} arg1, arg2, arg0
+               //    MOVD   $0, ret
+               //    BNE    2(PC)
+               //    MOVD   $1, ret
+               //    NOP (so the BNE has somewhere to land)
+
+               // CS{,G} arg1, arg2, arg0
+               cs := gc.Prog(v.Op.Asm())
+               cs.From.Type = obj.TYPE_REG
+               cs.From.Reg = v.Args[1].Reg() // old
+               cs.Reg = v.Args[2].Reg()      // new
+               cs.To.Type = obj.TYPE_MEM
+               cs.To.Reg = v.Args[0].Reg()
+               gc.AddAux(&cs.To, v)
+
+               // MOVD $0, ret
+               movd := gc.Prog(s390x.AMOVD)
+               movd.From.Type = obj.TYPE_CONST
+               movd.From.Offset = 0
+               movd.To.Type = obj.TYPE_REG
+               movd.To.Reg = v.Reg0()
+
+               // BNE 2(PC)
+               bne := gc.Prog(s390x.ABNE)
+               bne.To.Type = obj.TYPE_BRANCH
+
+               // MOVD $1, ret
+               movd = gc.Prog(s390x.AMOVD)
+               movd.From.Type = obj.TYPE_CONST
+               movd.From.Offset = 1
+               movd.To.Type = obj.TYPE_REG
+               movd.To.Reg = v.Reg0()
+
+               // NOP (so the BNE has somewhere to land)
+               nop := gc.Prog(obj.ANOP)
+               gc.Patch(bne, nop)
+       case ssa.OpS390XLoweredAtomicExchange32, ssa.OpS390XLoweredAtomicExchange64:
+               // Loop until the CS{,G} succeeds.
+               //     MOV{WZ,D} arg0, ret
+               // cs: CS{,G}    ret, arg1, arg0
+               //     BNE       cs
+
+               // MOV{WZ,D} arg0, ret
+               load := gc.Prog(loadByType(v.Type.FieldType(0)))
+               load.From.Type = obj.TYPE_MEM
+               load.From.Reg = v.Args[0].Reg()
+               load.To.Type = obj.TYPE_REG
+               load.To.Reg = v.Reg0()
+               gc.AddAux(&load.From, v)
+
+               // CS{,G} ret, arg1, arg0
+               cs := gc.Prog(v.Op.Asm())
+               cs.From.Type = obj.TYPE_REG
+               cs.From.Reg = v.Reg0()   // old
+               cs.Reg = v.Args[1].Reg() // new
+               cs.To.Type = obj.TYPE_MEM
+               cs.To.Reg = v.Args[0].Reg()
+               gc.AddAux(&cs.To, v)
+
+               // BNE cs
+               bne := gc.Prog(s390x.ABNE)
+               bne.To.Type = obj.TYPE_BRANCH
+               gc.Patch(bne, cs)
        default:
                v.Fatalf("genValue not implemented: %s", v.LongString())
        }
index fa628d4c8b4d11679626ab62f57f1ba8ed66c269..2dfc21e8a5b31940ad189ac8ff3b24a1a86aa1a2 100644 (file)
 
 (Sqrt x) -> (FSQRT x)
 
+// Atomic loads.
+(AtomicLoad32 ptr mem) -> (MOVWZatomicload ptr mem)
+(AtomicLoad64 ptr mem) -> (MOVDatomicload ptr mem)
+(AtomicLoadPtr ptr mem) -> (MOVDatomicload ptr mem)
+
+// Atomic stores.
+(AtomicStore32 ptr val mem) -> (MOVWatomicstore ptr val mem)
+(AtomicStore64 ptr val mem) -> (MOVDatomicstore ptr val mem)
+(AtomicStorePtrNoWB ptr val mem) -> (MOVDatomicstore ptr val mem)
+
+// Atomic adds.
+(AtomicAdd32 ptr val mem) -> (AddTupleFirst32 (LAA ptr val mem) val)
+(AtomicAdd64 ptr val mem) -> (AddTupleFirst64 (LAAG ptr val mem) val)
+(Select0 <t> (AddTupleFirst32 tuple val)) -> (ADDW val (Select0 <t> tuple))
+(Select1     (AddTupleFirst32 tuple _  )) -> (Select1 tuple)
+(Select0 <t> (AddTupleFirst64 tuple val)) -> (ADD val (Select0 <t> tuple))
+(Select1     (AddTupleFirst64 tuple _  )) -> (Select1 tuple)
+
+// Atomic exchanges.
+(AtomicExchange32 ptr val mem) -> (LoweredAtomicExchange32 ptr val mem)
+(AtomicExchange64 ptr val mem) -> (LoweredAtomicExchange64 ptr val mem)
+
+// Atomic compare and swap.
+(AtomicCompareAndSwap32 ptr old new_ mem) -> (LoweredAtomicCas32 ptr old new_ mem)
+(AtomicCompareAndSwap64 ptr old new_ mem) -> (LoweredAtomicCas64 ptr old new_ mem)
+
 // Lowering extension
 // Note: we always extend to 64 bits even though some ops don't need that many result bits.
 (SignExt8to16  x) -> (MOVBreg x)
index 7f1f5f928f612e4c1b791aa620ee01f1be427715..7a25c267846b05a20887ddc6726de85d4f5860a3 100644 (file)
@@ -135,6 +135,7 @@ func init() {
                gpstoreconst = regInfo{inputs: []regMask{ptrspsb, 0}}
                gpstoreidx   = regInfo{inputs: []regMask{ptrsp, ptrsp, gpsp, 0}}
                gpstorebr    = regInfo{inputs: []regMask{ptrsp, gpsp, 0}}
+               gpstorelaa   = regInfo{inputs: []regMask{ptrspsb, gpsp, 0}, outputs: gponly}
 
                gpmvc = regInfo{inputs: []regMask{ptrsp, ptrsp, 0}}
 
@@ -152,6 +153,15 @@ func init() {
 
                fpstore    = regInfo{inputs: []regMask{ptrspsb, fp, 0}}
                fpstoreidx = regInfo{inputs: []regMask{ptrsp, ptrsp, fp, 0}}
+
+               // LoweredAtomicCas may overwrite arg1, so force it to R0 for now.
+               cas = regInfo{inputs: []regMask{ptrsp, r0, gpsp, 0}, outputs: []regMask{gp, 0}, clobbers: r0}
+
+               // LoweredAtomicExchange overwrites the output before executing
+               // CS{,G}, so the output register must not be the same as the
+               // input register. For now we just force the output register to
+               // R0.
+               exchange = regInfo{inputs: []regMask{ptrsp, gpsp &^ r0, 0}, outputs: []regMask{r0, 0}}
        )
 
        var S390Xops = []opData{
@@ -408,6 +418,54 @@ func init() {
                {name: "FlagLT"}, // <
                {name: "FlagGT"}, // >
 
+               // Atomic loads. These are just normal loads but return <value,memory> tuples
+               // so they can be properly ordered with other loads.
+               // load from arg0+auxint+aux.  arg1=mem.
+               {name: "MOVWZatomicload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", faultOnNilArg0: true},
+               {name: "MOVDatomicload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", faultOnNilArg0: true},
+
+               // Atomic stores. These are just normal stores.
+               // store arg1 to arg0+auxint+aux. arg2=mem.
+               {name: "MOVWatomicstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true},
+               {name: "MOVDatomicstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true},
+
+               // Atomic adds.
+               // *(arg0+auxint+aux) += arg1.  arg2=mem.
+               // Returns a tuple of <old contents of *(arg0+auxint+aux), memory>.
+               {name: "LAA", argLength: 3, reg: gpstorelaa, asm: "LAA", typ: "(UInt32,Mem)", aux: "SymOff", faultOnNilArg0: true},
+               {name: "LAAG", argLength: 3, reg: gpstorelaa, asm: "LAAG", typ: "(UInt64,Mem)", aux: "SymOff", faultOnNilArg0: true},
+               {name: "AddTupleFirst32", argLength: 2}, // arg0=tuple <x,y>.  Returns <x+arg1,y>.
+               {name: "AddTupleFirst64", argLength: 2}, // arg0=tuple <x,y>.  Returns <x+arg1,y>.
+
+               // Compare and swap.
+               // arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory.
+               // if *(arg0+auxint+aux) == arg1 {
+               //   *(arg0+auxint+aux) = arg2
+               //   return (true, memory)
+               // } else {
+               //   return (false, memory)
+               // }
+               // Note that these instructions also return the old value in arg1, but we ignore it.
+               // TODO: have these return flags instead of bool.  The current system generates:
+               //    CS ...
+               //    MOVD  $0, ret
+               //    BNE   2(PC)
+               //    MOVD  $1, ret
+               //    CMPW  ret, $0
+               //    BNE ...
+               // instead of just
+               //    CS ...
+               //    BEQ ...
+               // but we can't do that because memory-using ops can't generate flags yet
+               // (flagalloc wants to move flag-generating instructions around).
+               {name: "LoweredAtomicCas32", argLength: 4, reg: cas, asm: "CS", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true},
+               {name: "LoweredAtomicCas64", argLength: 4, reg: cas, asm: "CSG", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true},
+
+               // Lowered atomic swaps, emulated using compare-and-swap.
+               // store arg1 to arg0+auxint+aux, arg2=mem.
+               {name: "LoweredAtomicExchange32", argLength: 3, reg: exchange, asm: "CS", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true},
+               {name: "LoweredAtomicExchange64", argLength: 3, reg: exchange, asm: "CSG", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true},
+
                // find leftmost one
                {
                        name:         "FLOGR",
index 3c2714063f7f119b2215f8fdcffce3ecf4ae03f3..b740f2a68ade91f23a85d02824a44d5ed33f73dc 100644 (file)
@@ -1417,6 +1417,18 @@ const (
        OpS390XFlagEQ
        OpS390XFlagLT
        OpS390XFlagGT
+       OpS390XMOVWZatomicload
+       OpS390XMOVDatomicload
+       OpS390XMOVWatomicstore
+       OpS390XMOVDatomicstore
+       OpS390XLAA
+       OpS390XLAAG
+       OpS390XAddTupleFirst32
+       OpS390XAddTupleFirst64
+       OpS390XLoweredAtomicCas32
+       OpS390XLoweredAtomicCas64
+       OpS390XLoweredAtomicExchange32
+       OpS390XLoweredAtomicExchange64
        OpS390XFLOGR
        OpS390XSTMG2
        OpS390XSTMG3
@@ -17933,6 +17945,182 @@ var opcodeTable = [...]opInfo{
                argLen: 0,
                reg:    regInfo{},
        },
+       {
+               name:           "MOVWZatomicload",
+               auxType:        auxSymOff,
+               argLen:         2,
+               faultOnNilArg0: true,
+               asm:            s390x.AMOVWZ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4295021566}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
+                       },
+               },
+       },
+       {
+               name:           "MOVDatomicload",
+               auxType:        auxSymOff,
+               argLen:         2,
+               faultOnNilArg0: true,
+               asm:            s390x.AMOVD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4295021566}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
+                       },
+               },
+       },
+       {
+               name:           "MOVWatomicstore",
+               auxType:        auxSymOff,
+               argLen:         3,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               asm:            s390x.AMOVW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4295021566}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP SB
+                               {1, 54271},      // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
+                       },
+               },
+       },
+       {
+               name:           "MOVDatomicstore",
+               auxType:        auxSymOff,
+               argLen:         3,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               asm:            s390x.AMOVD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4295021566}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP SB
+                               {1, 54271},      // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
+                       },
+               },
+       },
+       {
+               name:           "LAA",
+               auxType:        auxSymOff,
+               argLen:         3,
+               faultOnNilArg0: true,
+               asm:            s390x.ALAA,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4295021566}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP SB
+                               {1, 54271},      // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
+                       },
+                       outputs: []outputInfo{
+                               {0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
+                       },
+               },
+       },
+       {
+               name:           "LAAG",
+               auxType:        auxSymOff,
+               argLen:         3,
+               faultOnNilArg0: true,
+               asm:            s390x.ALAAG,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4295021566}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP SB
+                               {1, 54271},      // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
+                       },
+                       outputs: []outputInfo{
+                               {0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
+                       },
+               },
+       },
+       {
+               name:   "AddTupleFirst32",
+               argLen: 2,
+               reg:    regInfo{},
+       },
+       {
+               name:   "AddTupleFirst64",
+               argLen: 2,
+               reg:    regInfo{},
+       },
+       {
+               name:           "LoweredAtomicCas32",
+               auxType:        auxSymOff,
+               argLen:         4,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               asm:            s390x.ACS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 1},     // R0
+                               {0, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
+                               {2, 54271}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
+                       },
+                       clobbers: 1, // R0
+                       outputs: []outputInfo{
+                               {1, 0},
+                               {0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
+                       },
+               },
+       },
+       {
+               name:           "LoweredAtomicCas64",
+               auxType:        auxSymOff,
+               argLen:         4,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               asm:            s390x.ACSG,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 1},     // R0
+                               {0, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
+                               {2, 54271}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
+                       },
+                       clobbers: 1, // R0
+                       outputs: []outputInfo{
+                               {1, 0},
+                               {0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
+                       },
+               },
+       },
+       {
+               name:           "LoweredAtomicExchange32",
+               auxType:        auxSymOff,
+               argLen:         3,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               asm:            s390x.ACS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
+                               {1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
+                       },
+                       outputs: []outputInfo{
+                               {1, 0},
+                               {0, 1}, // R0
+                       },
+               },
+       },
+       {
+               name:           "LoweredAtomicExchange64",
+               auxType:        auxSymOff,
+               argLen:         3,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               asm:            s390x.ACSG,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
+                               {1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
+                       },
+                       outputs: []outputInfo{
+                               {1, 0},
+                               {0, 1}, // R0
+                       },
+               },
+       },
        {
                name:         "FLOGR",
                argLen:       1,
index 004ff45f3c438300f093044030601d218f2efe5a..4027fa7bcfbfd53c2b0cc9a977833bb3464db265 100644 (file)
@@ -34,6 +34,30 @@ func rewriteValueS390X(v *Value, config *Config) bool {
                return rewriteValueS390X_OpAnd8(v, config)
        case OpAndB:
                return rewriteValueS390X_OpAndB(v, config)
+       case OpAtomicAdd32:
+               return rewriteValueS390X_OpAtomicAdd32(v, config)
+       case OpAtomicAdd64:
+               return rewriteValueS390X_OpAtomicAdd64(v, config)
+       case OpAtomicCompareAndSwap32:
+               return rewriteValueS390X_OpAtomicCompareAndSwap32(v, config)
+       case OpAtomicCompareAndSwap64:
+               return rewriteValueS390X_OpAtomicCompareAndSwap64(v, config)
+       case OpAtomicExchange32:
+               return rewriteValueS390X_OpAtomicExchange32(v, config)
+       case OpAtomicExchange64:
+               return rewriteValueS390X_OpAtomicExchange64(v, config)
+       case OpAtomicLoad32:
+               return rewriteValueS390X_OpAtomicLoad32(v, config)
+       case OpAtomicLoad64:
+               return rewriteValueS390X_OpAtomicLoad64(v, config)
+       case OpAtomicLoadPtr:
+               return rewriteValueS390X_OpAtomicLoadPtr(v, config)
+       case OpAtomicStore32:
+               return rewriteValueS390X_OpAtomicStore32(v, config)
+       case OpAtomicStore64:
+               return rewriteValueS390X_OpAtomicStore64(v, config)
+       case OpAtomicStorePtrNoWB:
+               return rewriteValueS390X_OpAtomicStorePtrNoWB(v, config)
        case OpAvg64u:
                return rewriteValueS390X_OpAvg64u(v, config)
        case OpBswap32:
@@ -612,6 +636,10 @@ func rewriteValueS390X(v *Value, config *Config) bool {
                return rewriteValueS390X_OpS390XXORWconst(v, config)
        case OpS390XXORconst:
                return rewriteValueS390X_OpS390XXORconst(v, config)
+       case OpSelect0:
+               return rewriteValueS390X_OpSelect0(v, config)
+       case OpSelect1:
+               return rewriteValueS390X_OpSelect1(v, config)
        case OpSignExt16to32:
                return rewriteValueS390X_OpSignExt16to32(v, config)
        case OpSignExt16to64:
@@ -876,6 +904,214 @@ func rewriteValueS390X_OpAndB(v *Value, config *Config) bool {
                return true
        }
 }
+func rewriteValueS390X_OpAtomicAdd32(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicAdd32 ptr val mem)
+       // cond:
+       // result: (AddTupleFirst32 (LAA ptr val mem) val)
+       for {
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpS390XAddTupleFirst32)
+               v0 := b.NewValue0(v.Line, OpS390XLAA, MakeTuple(config.fe.TypeUInt32(), TypeMem))
+               v0.AddArg(ptr)
+               v0.AddArg(val)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               v.AddArg(val)
+               return true
+       }
+}
+func rewriteValueS390X_OpAtomicAdd64(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicAdd64 ptr val mem)
+       // cond:
+       // result: (AddTupleFirst64 (LAAG ptr val mem) val)
+       for {
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpS390XAddTupleFirst64)
+               v0 := b.NewValue0(v.Line, OpS390XLAAG, MakeTuple(config.fe.TypeUInt64(), TypeMem))
+               v0.AddArg(ptr)
+               v0.AddArg(val)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               v.AddArg(val)
+               return true
+       }
+}
+func rewriteValueS390X_OpAtomicCompareAndSwap32(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicCompareAndSwap32 ptr old new_ mem)
+       // cond:
+       // result: (LoweredAtomicCas32 ptr old new_ mem)
+       for {
+               ptr := v.Args[0]
+               old := v.Args[1]
+               new_ := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XLoweredAtomicCas32)
+               v.AddArg(ptr)
+               v.AddArg(old)
+               v.AddArg(new_)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValueS390X_OpAtomicCompareAndSwap64(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicCompareAndSwap64 ptr old new_ mem)
+       // cond:
+       // result: (LoweredAtomicCas64 ptr old new_ mem)
+       for {
+               ptr := v.Args[0]
+               old := v.Args[1]
+               new_ := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XLoweredAtomicCas64)
+               v.AddArg(ptr)
+               v.AddArg(old)
+               v.AddArg(new_)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValueS390X_OpAtomicExchange32(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicExchange32 ptr val mem)
+       // cond:
+       // result: (LoweredAtomicExchange32 ptr val mem)
+       for {
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpS390XLoweredAtomicExchange32)
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValueS390X_OpAtomicExchange64(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicExchange64 ptr val mem)
+       // cond:
+       // result: (LoweredAtomicExchange64 ptr val mem)
+       for {
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpS390XLoweredAtomicExchange64)
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValueS390X_OpAtomicLoad32(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicLoad32 ptr mem)
+       // cond:
+       // result: (MOVWZatomicload ptr mem)
+       for {
+               ptr := v.Args[0]
+               mem := v.Args[1]
+               v.reset(OpS390XMOVWZatomicload)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValueS390X_OpAtomicLoad64(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicLoad64 ptr mem)
+       // cond:
+       // result: (MOVDatomicload ptr mem)
+       for {
+               ptr := v.Args[0]
+               mem := v.Args[1]
+               v.reset(OpS390XMOVDatomicload)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValueS390X_OpAtomicLoadPtr(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicLoadPtr ptr mem)
+       // cond:
+       // result: (MOVDatomicload ptr mem)
+       for {
+               ptr := v.Args[0]
+               mem := v.Args[1]
+               v.reset(OpS390XMOVDatomicload)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValueS390X_OpAtomicStore32(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicStore32 ptr val mem)
+       // cond:
+       // result: (MOVWatomicstore ptr val mem)
+       for {
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVWatomicstore)
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValueS390X_OpAtomicStore64(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicStore64 ptr val mem)
+       // cond:
+       // result: (MOVDatomicstore ptr val mem)
+       for {
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVDatomicstore)
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValueS390X_OpAtomicStorePtrNoWB(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicStorePtrNoWB ptr val mem)
+       // cond:
+       // result: (MOVDatomicstore ptr val mem)
+       for {
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVDatomicstore)
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+}
 func rewriteValueS390X_OpAvg64u(v *Value, config *Config) bool {
        b := v.Block
        _ = b
@@ -16764,6 +17000,78 @@ func rewriteValueS390X_OpS390XXORconst(v *Value, config *Config) bool {
        }
        return false
 }
+func rewriteValueS390X_OpSelect0(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Select0 <t> (AddTupleFirst32 tuple val))
+       // cond:
+       // result: (ADDW val (Select0 <t> tuple))
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XAddTupleFirst32 {
+                       break
+               }
+               tuple := v_0.Args[0]
+               val := v_0.Args[1]
+               v.reset(OpS390XADDW)
+               v.AddArg(val)
+               v0 := b.NewValue0(v.Line, OpSelect0, t)
+               v0.AddArg(tuple)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Select0 <t> (AddTupleFirst64 tuple val))
+       // cond:
+       // result: (ADD val (Select0 <t> tuple))
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XAddTupleFirst64 {
+                       break
+               }
+               tuple := v_0.Args[0]
+               val := v_0.Args[1]
+               v.reset(OpS390XADD)
+               v.AddArg(val)
+               v0 := b.NewValue0(v.Line, OpSelect0, t)
+               v0.AddArg(tuple)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpSelect1(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Select1     (AddTupleFirst32 tuple _  ))
+       // cond:
+       // result: (Select1 tuple)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XAddTupleFirst32 {
+                       break
+               }
+               tuple := v_0.Args[0]
+               v.reset(OpSelect1)
+               v.AddArg(tuple)
+               return true
+       }
+       // match: (Select1     (AddTupleFirst64 tuple _  ))
+       // cond:
+       // result: (Select1 tuple)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XAddTupleFirst64 {
+                       break
+               }
+               tuple := v_0.Args[0]
+               v.reset(OpSelect1)
+               v.AddArg(tuple)
+               return true
+       }
+       return false
+}
 func rewriteValueS390X_OpSignExt16to32(v *Value, config *Config) bool {
        b := v.Block
        _ = b