From 96cdacb9717271126eb60de3d8410c9cecd67b28 Mon Sep 17 00:00:00 2001 From: Michael Munday Date: Fri, 27 Oct 2017 09:45:45 -0400 Subject: [PATCH] cmd/asm, cmd/compile: optimize math.Abs and math.Copysign on s390x MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This change adds three new instructions: - LPDFR: load positive (math.Abs(x)) - LNDFR: load negative (-math.Abs(x)) - CPSDR: copy sign (math.Copysign(x, y)) By making use of GPR <-> FPR moves we can now compile math.Abs and math.Copysign to these instructions using SSA rules. This CL also adds new rules to merge address generation into combined load operations. This makes GPR <-> FPR move matching more reliable. name old time/op new time/op delta Copysign 1.85ns ± 0% 1.40ns ± 1% -24.65% (p=0.000 n=8+10) Abs 1.58ns ± 1% 0.73ns ± 1% -53.64% (p=0.000 n=10+10) The geo mean improvement for all math package benchmarks was 4.6%. Change-Id: I0cec35c5c1b3fb45243bf666b56b57faca981bc9 Reviewed-on: https://go-review.googlesource.com/73950 Run-TryBot: Michael Munday Reviewed-by: Keith Randall --- src/cmd/asm/internal/asm/testdata/s390x.s | 3 + src/cmd/compile/internal/gc/asm_test.go | 64 + src/cmd/compile/internal/s390x/ssa.go | 7 +- src/cmd/compile/internal/ssa/gen/S390X.rules | 81 +- src/cmd/compile/internal/ssa/gen/S390XOps.go | 5 + src/cmd/compile/internal/ssa/opGen.go | 71 + src/cmd/compile/internal/ssa/rewriteS390X.go | 1922 ++++++++++++++++-- src/cmd/internal/obj/s390x/a.out.go | 3 + src/cmd/internal/obj/s390x/anames.go | 3 + src/cmd/internal/obj/s390x/asmz.go | 10 + 10 files changed, 2035 insertions(+), 134 deletions(-) diff --git a/src/cmd/asm/internal/asm/testdata/s390x.s b/src/cmd/asm/internal/asm/testdata/s390x.s index 6cc129ccc5..269f8bd077 100644 --- a/src/cmd/asm/internal/asm/testdata/s390x.s +++ b/src/cmd/asm/internal/asm/testdata/s390x.s @@ -296,6 +296,9 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16- FMADDS F1, F2, F3 // b30e3012 FMSUB F4, F5, F5 // b31f5045 FMSUBS F6, F6, F7 // b30f7066 + LPDFR F1, F2 // b3700021 + LNDFR F3, F4 // b3710043 + CPSDR F5, F6, F7 // b3725076 VL (R15), V1 // e710f0000006 VST V1, (R15) // e710f000000e diff --git a/src/cmd/compile/internal/gc/asm_test.go b/src/cmd/compile/internal/gc/asm_test.go index 687a3a3240..4320628f63 100644 --- a/src/cmd/compile/internal/gc/asm_test.go +++ b/src/cmd/compile/internal/gc/asm_test.go @@ -1691,6 +1691,70 @@ var linuxS390XTests = []*asmTest{ pos: []string{"\tMOV(B|BZ|D)\t[$]1,"}, neg: []string{"\tCEBR\t", "\tMOV(B|BZ|D)\t[$]0,"}, }, + // math tests + { + fn: ` + func $(x float64) float64 { + return math.Abs(x) + } + `, + pos: []string{"\tLPDFR\t"}, + neg: []string{"\tMOVD\t"}, // no integer loads/stores + }, + { + fn: ` + func $(x float32) float32 { + return float32(math.Abs(float64(x))) + } + `, + pos: []string{"\tLPDFR\t"}, + neg: []string{"\tLDEBR\t", "\tLEDBR\t"}, // no float64 conversion + }, + { + fn: ` + func $(x float64) float64 { + return math.Float64frombits(math.Float64bits(x)|1<<63) + } + `, + pos: []string{"\tLNDFR\t"}, + neg: []string{"\tMOVD\t"}, // no integer loads/stores + }, + { + fn: ` + func $(x float64) float64 { + return -math.Abs(x) + } + `, + pos: []string{"\tLNDFR\t"}, + neg: []string{"\tMOVD\t"}, // no integer loads/stores + }, + { + fn: ` + func $(x, y float64) float64 { + return math.Copysign(x, y) + } + `, + pos: []string{"\tCPSDR\t"}, + neg: []string{"\tMOVD\t"}, // no integer loads/stores + }, + { + fn: ` + func $(x float64) float64 { + return math.Copysign(x, -1) + } + `, + pos: []string{"\tLNDFR\t"}, + neg: []string{"\tMOVD\t"}, // no integer loads/stores + }, + { + fn: ` + func $(x float64) float64 { + return math.Copysign(-1, x) + } + `, + pos: []string{"\tCPSDR\t"}, + neg: []string{"\tMOVD\t"}, // no integer loads/stores + }, } var linuxARMTests = []*asmTest{ diff --git a/src/cmd/compile/internal/s390x/ssa.go b/src/cmd/compile/internal/s390x/ssa.go index ad6699e563..a1ba376e13 100644 --- a/src/cmd/compile/internal/s390x/ssa.go +++ b/src/cmd/compile/internal/s390x/ssa.go @@ -214,6 +214,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { default: v.Fatalf("invalid FIDBR mask: %v", v.AuxInt) } + case ssa.OpS390XCPSDR: + p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg()) + p.Reg = v.Args[0].Reg() case ssa.OpS390XDIVD, ssa.OpS390XDIVW, ssa.OpS390XDIVDU, ssa.OpS390XDIVWU, ssa.OpS390XMODD, ssa.OpS390XMODW, @@ -432,10 +435,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { gc.AddAux2(&p.To, v, sc.Off()) case ssa.OpS390XMOVBreg, ssa.OpS390XMOVHreg, ssa.OpS390XMOVWreg, ssa.OpS390XMOVBZreg, ssa.OpS390XMOVHZreg, ssa.OpS390XMOVWZreg, + ssa.OpS390XLDGR, ssa.OpS390XLGDR, ssa.OpS390XCEFBRA, ssa.OpS390XCDFBRA, ssa.OpS390XCEGBRA, ssa.OpS390XCDGBRA, ssa.OpS390XCFEBRA, ssa.OpS390XCFDBRA, ssa.OpS390XCGEBRA, ssa.OpS390XCGDBRA, ssa.OpS390XLDEBR, ssa.OpS390XLEDBR, - ssa.OpS390XFNEG, ssa.OpS390XFNEGS: + ssa.OpS390XFNEG, ssa.OpS390XFNEGS, + ssa.OpS390XLPDFR, ssa.OpS390XLNDFR: opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg()) case ssa.OpS390XCLEAR: p := s.Prog(v.Op.Asm()) diff --git a/src/cmd/compile/internal/ssa/gen/S390X.rules b/src/cmd/compile/internal/ssa/gen/S390X.rules index f31b1c3007..21bd728508 100644 --- a/src/cmd/compile/internal/ssa/gen/S390X.rules +++ b/src/cmd/compile/internal/ssa/gen/S390X.rules @@ -688,10 +688,55 @@ (MOVWZreg x:(MOVWZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZloadidx [off] {sym} ptr idx mem) // replace load from same location as preceding store with copy -(MOVBZload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBZreg x) -(MOVHZload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHZreg x) -(MOVWZload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWZreg x) -(MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDreg x) +(MOVDload [off] {sym} ptr1 (MOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVDreg x) +(MOVWload [off] {sym} ptr1 (MOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVWreg x) +(MOVHload [off] {sym} ptr1 (MOVHstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVHreg x) +(MOVBload [off] {sym} ptr1 (MOVBstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVBreg x) +(MOVWZload [off] {sym} ptr1 (MOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVWZreg x) +(MOVHZload [off] {sym} ptr1 (MOVHstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVHZreg x) +(MOVBZload [off] {sym} ptr1 (MOVBstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVBZreg x) +(MOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (LGDR x) +(FMOVDload [off] {sym} ptr1 (MOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (LDGR x) +(FMOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> x +(FMOVSload [off] {sym} ptr1 (FMOVSstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> x + +// prefer FPR <-> GPR moves over combined load ops +(MULLDload [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (MULLD x (LGDR y)) +(ADDload [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (ADD x (LGDR y)) +(SUBload [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (SUB x (LGDR y)) +(ORload [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (OR x (LGDR y)) +(ANDload [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (AND x (LGDR y)) +(XORload [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (XOR x (LGDR y)) + +// detect attempts to set/clear the sign bit +// may need to be reworked when NIHH/OIHH are added +(SRDconst [1] (SLDconst [1] (LGDR x))) -> (LGDR (LPDFR x)) +(LDGR (SRDconst [1] (SLDconst [1] x))) -> (LPDFR (LDGR x)) +(OR (MOVDconst [-1<<63]) (LGDR x)) -> (LGDR (LNDFR x)) +(LDGR (OR (MOVDconst [-1<<63]) x)) -> (LNDFR (LDGR x)) + +// detect attempts to set the sign bit with load +(LDGR x:(ORload [off] {sym} (MOVDconst [-1<<63]) ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (LNDFR (LDGR (MOVDload [off] {sym} ptr mem))) + +// detect copysign +(OR (SLDconst [63] (SRDconst [63] (LGDR x))) (LGDR (LPDFR y))) -> (LGDR (CPSDR y x)) +(OR (SLDconst [63] (SRDconst [63] (LGDR x))) (MOVDconst [c])) && c & -1<<63 == 0 -> (LGDR (CPSDR (FMOVDconst [c]) x)) +(CPSDR y (FMOVDconst [c])) && c & -1<<63 == 0 -> (LPDFR y) +(CPSDR y (FMOVDconst [c])) && c & -1<<63 != 0 -> (LNDFR y) + +// absorb negations into set/clear sign bit +(FNEG (LPDFR x)) -> (LNDFR x) +(FNEG (LNDFR x)) -> (LPDFR x) +(FNEGS (LPDFR x)) -> (LNDFR x) +(FNEGS (LNDFR x)) -> (LPDFR x) + +// no need to convert float32 to float64 to set/clear sign bit +(LEDBR (LPDFR (LDEBR x))) -> (LPDFR x) +(LEDBR (LNDFR (LDEBR x))) -> (LNDFR x) + +// remove unnecessary FPR <-> GPR moves +(LDGR (LGDR x)) -> x +(LGDR (LDGR x)) -> (MOVDreg x) // Don't extend before storing (MOVWstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVWstore [off] {sym} ptr x mem) @@ -723,6 +768,20 @@ (FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (FMOVSstore [off1+off2] {sym} ptr val mem) (FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (FMOVDstore [off1+off2] {sym} ptr val mem) +(ADDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ADDload [off1+off2] {sym} x ptr mem) +(ADDWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ADDWload [off1+off2] {sym} x ptr mem) +(MULLDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (MULLDload [off1+off2] {sym} x ptr mem) +(MULLWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (MULLWload [off1+off2] {sym} x ptr mem) +(SUBload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (SUBload [off1+off2] {sym} x ptr mem) +(SUBWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (SUBWload [off1+off2] {sym} x ptr mem) + +(ANDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ANDload [off1+off2] {sym} x ptr mem) +(ANDWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ANDWload [off1+off2] {sym} x ptr mem) +(ORload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ORload [off1+off2] {sym} x ptr mem) +(ORWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ORWload [off1+off2] {sym} x ptr mem) +(XORload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (XORload [off1+off2] {sym} x ptr mem) +(XORWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (XORWload [off1+off2] {sym} x ptr mem) + // Fold constants into stores. (MOVDstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(off) && ptr.Op != OpSB -> (MOVDstoreconst [makeValAndOff(c,off)] {sym} ptr mem) @@ -780,6 +839,20 @@ (FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) +(ADDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ADDload [o1+o2] {mergeSym(s1, s2)} x ptr mem) +(ADDWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ADDWload [o1+o2] {mergeSym(s1, s2)} x ptr mem) +(MULLDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (MULLDload [o1+o2] {mergeSym(s1, s2)} x ptr mem) +(MULLWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (MULLWload [o1+o2] {mergeSym(s1, s2)} x ptr mem) +(SUBload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (SUBload [o1+o2] {mergeSym(s1, s2)} x ptr mem) +(SUBWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (SUBWload [o1+o2] {mergeSym(s1, s2)} x ptr mem) + +(ANDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ANDload [o1+o2] {mergeSym(s1, s2)} x ptr mem) +(ANDWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ANDWload [o1+o2] {mergeSym(s1, s2)} x ptr mem) +(ORload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ORload [o1+o2] {mergeSym(s1, s2)} x ptr mem) +(ORWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ORWload [o1+o2] {mergeSym(s1, s2)} x ptr mem) +(XORload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (XORload [o1+o2] {mergeSym(s1, s2)} x ptr mem) +(XORWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (XORWload [o1+o2] {mergeSym(s1, s2)} x ptr mem) + // Cannot store constant to SB directly (no 'move relative long immediate' instructions). (MOVDstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> (MOVDstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) diff --git a/src/cmd/compile/internal/ssa/gen/S390XOps.go b/src/cmd/compile/internal/ssa/gen/S390XOps.go index 372a87d487..84e4f2b629 100644 --- a/src/cmd/compile/internal/ssa/gen/S390XOps.go +++ b/src/cmd/compile/internal/ssa/gen/S390XOps.go @@ -205,6 +205,9 @@ func init() { {name: "FMADD", argLength: 3, reg: fp31, asm: "FMADD", resultInArg0: true}, // fp64 arg1 * arg2 + arg0 {name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS", resultInArg0: true}, // fp32 arg1 * arg2 - arg0 {name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB", resultInArg0: true}, // fp64 arg1 * arg2 - arg0 + {name: "LPDFR", argLength: 1, reg: fp11, asm: "LPDFR"}, // fp64/fp32 set sign bit + {name: "LNDFR", argLength: 1, reg: fp11, asm: "LNDFR"}, // fp64/fp32 clear sign bit + {name: "CPSDR", argLength: 2, reg: fp21, asm: "CPSDR"}, // fp64/fp32 copy arg1 sign bit to arg0 // Round to integer, float64 only. // @@ -357,6 +360,8 @@ func init() { {name: "MOVDconst", reg: gp01, asm: "MOVD", typ: "UInt64", aux: "Int64", rematerializeable: true}, // auxint + {name: "LDGR", argLength: 1, reg: gpfp, asm: "LDGR"}, // move int64 to float64 (no conversion) + {name: "LGDR", argLength: 1, reg: fpgp, asm: "LGDR"}, // move float64 to int64 (no conversion) {name: "CFDBRA", argLength: 1, reg: fpgp, asm: "CFDBRA"}, // convert float64 to int32 {name: "CGDBRA", argLength: 1, reg: fpgp, asm: "CGDBRA"}, // convert float64 to int64 {name: "CFEBRA", argLength: 1, reg: fpgp, asm: "CFEBRA"}, // convert float32 to int32 diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index e65cb1e7d6..a18cf78e88 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1505,6 +1505,9 @@ const ( OpS390XFMADD OpS390XFMSUBS OpS390XFMSUB + OpS390XLPDFR + OpS390XLNDFR + OpS390XCPSDR OpS390XFIDBR OpS390XFMOVSload OpS390XFMOVDload @@ -1610,6 +1613,8 @@ const ( OpS390XMOVDreg OpS390XMOVDnop OpS390XMOVDconst + OpS390XLDGR + OpS390XLGDR OpS390XCFDBRA OpS390XCGDBRA OpS390XCFEBRA @@ -19385,6 +19390,46 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "LPDFR", + argLen: 1, + asm: s390x.ALPDFR, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + }, + outputs: []outputInfo{ + {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + }, + }, + }, + { + name: "LNDFR", + argLen: 1, + asm: s390x.ALNDFR, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + }, + outputs: []outputInfo{ + {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + }, + }, + }, + { + name: "CPSDR", + argLen: 2, + asm: s390x.ACPSDR, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + }, + outputs: []outputInfo{ + {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + }, + }, + }, { name: "FIDBR", auxType: auxInt8, @@ -20950,6 +20995,32 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "LDGR", + argLen: 1, + asm: s390x.ALDGR, + reg: regInfo{ + inputs: []inputInfo{ + {0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 + }, + outputs: []outputInfo{ + {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + }, + }, + }, + { + name: "LGDR", + argLen: 1, + asm: s390x.ALGDR, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + }, + outputs: []outputInfo{ + {0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 + }, + }, + }, { name: "CFDBRA", argLen: 1, diff --git a/src/cmd/compile/internal/ssa/rewriteS390X.go b/src/cmd/compile/internal/ssa/rewriteS390X.go index a937da12eb..9237bfc4fc 100644 --- a/src/cmd/compile/internal/ssa/rewriteS390X.go +++ b/src/cmd/compile/internal/ssa/rewriteS390X.go @@ -453,16 +453,24 @@ func rewriteValueS390X(v *Value) bool { return rewriteValueS390X_OpS390XADDW_0(v) || rewriteValueS390X_OpS390XADDW_10(v) case OpS390XADDWconst: return rewriteValueS390X_OpS390XADDWconst_0(v) + case OpS390XADDWload: + return rewriteValueS390X_OpS390XADDWload_0(v) case OpS390XADDconst: return rewriteValueS390X_OpS390XADDconst_0(v) + case OpS390XADDload: + return rewriteValueS390X_OpS390XADDload_0(v) case OpS390XAND: return rewriteValueS390X_OpS390XAND_0(v) || rewriteValueS390X_OpS390XAND_10(v) case OpS390XANDW: return rewriteValueS390X_OpS390XANDW_0(v) || rewriteValueS390X_OpS390XANDW_10(v) case OpS390XANDWconst: return rewriteValueS390X_OpS390XANDWconst_0(v) + case OpS390XANDWload: + return rewriteValueS390X_OpS390XANDWload_0(v) case OpS390XANDconst: return rewriteValueS390X_OpS390XANDconst_0(v) + case OpS390XANDload: + return rewriteValueS390X_OpS390XANDload_0(v) case OpS390XCMP: return rewriteValueS390X_OpS390XCMP_0(v) case OpS390XCMPU: @@ -479,6 +487,8 @@ func rewriteValueS390X(v *Value) bool { return rewriteValueS390X_OpS390XCMPWconst_0(v) case OpS390XCMPconst: return rewriteValueS390X_OpS390XCMPconst_0(v) + case OpS390XCPSDR: + return rewriteValueS390X_OpS390XCPSDR_0(v) case OpS390XFADD: return rewriteValueS390X_OpS390XFADD_0(v) case OpS390XFADDS: @@ -499,10 +509,20 @@ func rewriteValueS390X(v *Value) bool { return rewriteValueS390X_OpS390XFMOVSstore_0(v) case OpS390XFMOVSstoreidx: return rewriteValueS390X_OpS390XFMOVSstoreidx_0(v) + case OpS390XFNEG: + return rewriteValueS390X_OpS390XFNEG_0(v) + case OpS390XFNEGS: + return rewriteValueS390X_OpS390XFNEGS_0(v) case OpS390XFSUB: return rewriteValueS390X_OpS390XFSUB_0(v) case OpS390XFSUBS: return rewriteValueS390X_OpS390XFSUBS_0(v) + case OpS390XLDGR: + return rewriteValueS390X_OpS390XLDGR_0(v) + case OpS390XLEDBR: + return rewriteValueS390X_OpS390XLEDBR_0(v) + case OpS390XLGDR: + return rewriteValueS390X_OpS390XLGDR_0(v) case OpS390XLoweredRound32F: return rewriteValueS390X_OpS390XLoweredRound32F_0(v) case OpS390XLoweredRound64F: @@ -595,10 +615,14 @@ func rewriteValueS390X(v *Value) bool { return rewriteValueS390X_OpS390XMULLD_0(v) case OpS390XMULLDconst: return rewriteValueS390X_OpS390XMULLDconst_0(v) + case OpS390XMULLDload: + return rewriteValueS390X_OpS390XMULLDload_0(v) case OpS390XMULLW: return rewriteValueS390X_OpS390XMULLW_0(v) case OpS390XMULLWconst: return rewriteValueS390X_OpS390XMULLWconst_0(v) + case OpS390XMULLWload: + return rewriteValueS390X_OpS390XMULLWload_0(v) case OpS390XNEG: return rewriteValueS390X_OpS390XNEG_0(v) case OpS390XNEGW: @@ -613,8 +637,12 @@ func rewriteValueS390X(v *Value) bool { return rewriteValueS390X_OpS390XORW_0(v) || rewriteValueS390X_OpS390XORW_10(v) || rewriteValueS390X_OpS390XORW_20(v) || rewriteValueS390X_OpS390XORW_30(v) || rewriteValueS390X_OpS390XORW_40(v) || rewriteValueS390X_OpS390XORW_50(v) || rewriteValueS390X_OpS390XORW_60(v) || rewriteValueS390X_OpS390XORW_70(v) || rewriteValueS390X_OpS390XORW_80(v) || rewriteValueS390X_OpS390XORW_90(v) case OpS390XORWconst: return rewriteValueS390X_OpS390XORWconst_0(v) + case OpS390XORWload: + return rewriteValueS390X_OpS390XORWload_0(v) case OpS390XORconst: return rewriteValueS390X_OpS390XORconst_0(v) + case OpS390XORload: + return rewriteValueS390X_OpS390XORload_0(v) case OpS390XSLD: return rewriteValueS390X_OpS390XSLD_0(v) case OpS390XSLW: @@ -629,6 +657,8 @@ func rewriteValueS390X(v *Value) bool { return rewriteValueS390X_OpS390XSRAWconst_0(v) case OpS390XSRD: return rewriteValueS390X_OpS390XSRD_0(v) + case OpS390XSRDconst: + return rewriteValueS390X_OpS390XSRDconst_0(v) case OpS390XSRW: return rewriteValueS390X_OpS390XSRW_0(v) case OpS390XSTM2: @@ -645,16 +675,24 @@ func rewriteValueS390X(v *Value) bool { return rewriteValueS390X_OpS390XSUBW_0(v) case OpS390XSUBWconst: return rewriteValueS390X_OpS390XSUBWconst_0(v) + case OpS390XSUBWload: + return rewriteValueS390X_OpS390XSUBWload_0(v) case OpS390XSUBconst: return rewriteValueS390X_OpS390XSUBconst_0(v) + case OpS390XSUBload: + return rewriteValueS390X_OpS390XSUBload_0(v) case OpS390XXOR: return rewriteValueS390X_OpS390XXOR_0(v) || rewriteValueS390X_OpS390XXOR_10(v) case OpS390XXORW: return rewriteValueS390X_OpS390XXORW_0(v) || rewriteValueS390X_OpS390XXORW_10(v) case OpS390XXORWconst: return rewriteValueS390X_OpS390XXORWconst_0(v) + case OpS390XXORWload: + return rewriteValueS390X_OpS390XXORWload_0(v) case OpS390XXORconst: return rewriteValueS390X_OpS390XXORconst_0(v) + case OpS390XXORload: + return rewriteValueS390X_OpS390XXORload_0(v) case OpSelect0: return rewriteValueS390X_OpSelect0_0(v) case OpSelect1: @@ -6633,6 +6671,62 @@ func rewriteValueS390X_OpS390XADDWconst_0(v *Value) bool { } return false } +func rewriteValueS390X_OpS390XADDWload_0(v *Value) bool { + // match: (ADDWload [off1] {sym} x (ADDconst [off2] ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(off1+off2) + // result: (ADDWload [off1+off2] {sym} x ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XADDconst { + break + } + off2 := v_1.AuxInt + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(off1+off2)) { + break + } + v.reset(OpS390XADDWload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (ADDWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) + // result: (ADDWload [o1+o2] {mergeSym(s1, s2)} x ptr mem) + for { + o1 := v.AuxInt + s1 := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVDaddr { + break + } + o2 := v_1.AuxInt + s2 := v_1.Aux + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2)) { + break + } + v.reset(OpS390XADDWload) + v.AuxInt = o1 + o2 + v.Aux = mergeSym(s1, s2) + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} func rewriteValueS390X_OpS390XADDconst_0(v *Value) bool { // match: (ADDconst [c] (MOVDaddr [d] {s} x:(SB))) // cond: ((c+d)&1 == 0) && is32Bit(c+d) @@ -6751,6 +6845,97 @@ func rewriteValueS390X_OpS390XADDconst_0(v *Value) bool { } return false } +func rewriteValueS390X_OpS390XADDload_0(v *Value) bool { + b := v.Block + _ = b + // match: (ADDload [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) + // cond: isSamePtr(ptr1, ptr2) + // result: (ADD x (LGDR y)) + for { + t := v.Type + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + x := v.Args[0] + ptr1 := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpS390XFMOVDstore { + break + } + if v_2.AuxInt != off { + break + } + if v_2.Aux != sym { + break + } + _ = v_2.Args[2] + ptr2 := v_2.Args[0] + y := v_2.Args[1] + if !(isSamePtr(ptr1, ptr2)) { + break + } + v.reset(OpS390XADD) + v.AddArg(x) + v0 := b.NewValue0(v.Pos, OpS390XLGDR, t) + v0.AddArg(y) + v.AddArg(v0) + return true + } + // match: (ADDload [off1] {sym} x (ADDconst [off2] ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(off1+off2) + // result: (ADDload [off1+off2] {sym} x ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XADDconst { + break + } + off2 := v_1.AuxInt + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(off1+off2)) { + break + } + v.reset(OpS390XADDload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (ADDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) + // result: (ADDload [o1+o2] {mergeSym(s1, s2)} x ptr mem) + for { + o1 := v.AuxInt + s1 := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVDaddr { + break + } + o2 := v_1.AuxInt + s2 := v_1.Aux + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2)) { + break + } + v.reset(OpS390XADDload) + v.AuxInt = o1 + o2 + v.Aux = mergeSym(s1, s2) + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} func rewriteValueS390X_OpS390XAND_0(v *Value) bool { // match: (AND x (MOVDconst [c])) // cond: is32Bit(c) && c < 0 @@ -7420,6 +7605,62 @@ func rewriteValueS390X_OpS390XANDWconst_0(v *Value) bool { } return false } +func rewriteValueS390X_OpS390XANDWload_0(v *Value) bool { + // match: (ANDWload [off1] {sym} x (ADDconst [off2] ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(off1+off2) + // result: (ANDWload [off1+off2] {sym} x ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XADDconst { + break + } + off2 := v_1.AuxInt + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(off1+off2)) { + break + } + v.reset(OpS390XANDWload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (ANDWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) + // result: (ANDWload [o1+o2] {mergeSym(s1, s2)} x ptr mem) + for { + o1 := v.AuxInt + s1 := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVDaddr { + break + } + o2 := v_1.AuxInt + s2 := v_1.Aux + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2)) { + break + } + v.reset(OpS390XANDWload) + v.AuxInt = o1 + o2 + v.Aux = mergeSym(s1, s2) + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} func rewriteValueS390X_OpS390XANDconst_0(v *Value) bool { // match: (ANDconst [c] (ANDconst [d] x)) // cond: @@ -7477,6 +7718,97 @@ func rewriteValueS390X_OpS390XANDconst_0(v *Value) bool { } return false } +func rewriteValueS390X_OpS390XANDload_0(v *Value) bool { + b := v.Block + _ = b + // match: (ANDload [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) + // cond: isSamePtr(ptr1, ptr2) + // result: (AND x (LGDR y)) + for { + t := v.Type + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + x := v.Args[0] + ptr1 := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpS390XFMOVDstore { + break + } + if v_2.AuxInt != off { + break + } + if v_2.Aux != sym { + break + } + _ = v_2.Args[2] + ptr2 := v_2.Args[0] + y := v_2.Args[1] + if !(isSamePtr(ptr1, ptr2)) { + break + } + v.reset(OpS390XAND) + v.AddArg(x) + v0 := b.NewValue0(v.Pos, OpS390XLGDR, t) + v0.AddArg(y) + v.AddArg(v0) + return true + } + // match: (ANDload [off1] {sym} x (ADDconst [off2] ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(off1+off2) + // result: (ANDload [off1+off2] {sym} x ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XADDconst { + break + } + off2 := v_1.AuxInt + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(off1+off2)) { + break + } + v.reset(OpS390XANDload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (ANDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) + // result: (ANDload [o1+o2] {mergeSym(s1, s2)} x ptr mem) + for { + o1 := v.AuxInt + s1 := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVDaddr { + break + } + o2 := v_1.AuxInt + s2 := v_1.Aux + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2)) { + break + } + v.reset(OpS390XANDload) + v.AuxInt = o1 + o2 + v.Aux = mergeSym(s1, s2) + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} func rewriteValueS390X_OpS390XCMP_0(v *Value) bool { b := v.Block _ = b @@ -7958,6 +8290,45 @@ func rewriteValueS390X_OpS390XCMPconst_0(v *Value) bool { } return false } +func rewriteValueS390X_OpS390XCPSDR_0(v *Value) bool { + // match: (CPSDR y (FMOVDconst [c])) + // cond: c & -1<<63 == 0 + // result: (LPDFR y) + for { + _ = v.Args[1] + y := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XFMOVDconst { + break + } + c := v_1.AuxInt + if !(c&-1<<63 == 0) { + break + } + v.reset(OpS390XLPDFR) + v.AddArg(y) + return true + } + // match: (CPSDR y (FMOVDconst [c])) + // cond: c & -1<<63 != 0 + // result: (LNDFR y) + for { + _ = v.Args[1] + y := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XFMOVDconst { + break + } + c := v_1.AuxInt + if !(c&-1<<63 != 0) { + break + } + v.reset(OpS390XLNDFR) + v.AddArg(y) + return true + } + return false +} func rewriteValueS390X_OpS390XFADD_0(v *Value) bool { // match: (FADD (FMUL y z) x) // cond: @@ -8041,6 +8412,63 @@ func rewriteValueS390X_OpS390XFADDS_0(v *Value) bool { return false } func rewriteValueS390X_OpS390XFMOVDload_0(v *Value) bool { + // match: (FMOVDload [off] {sym} ptr1 (MOVDstore [off] {sym} ptr2 x _)) + // cond: isSamePtr(ptr1, ptr2) + // result: (LDGR x) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + ptr1 := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVDstore { + break + } + if v_1.AuxInt != off { + break + } + if v_1.Aux != sym { + break + } + _ = v_1.Args[2] + ptr2 := v_1.Args[0] + x := v_1.Args[1] + if !(isSamePtr(ptr1, ptr2)) { + break + } + v.reset(OpS390XLDGR) + v.AddArg(x) + return true + } + // match: (FMOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) + // cond: isSamePtr(ptr1, ptr2) + // result: x + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + ptr1 := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XFMOVDstore { + break + } + if v_1.AuxInt != off { + break + } + if v_1.Aux != sym { + break + } + _ = v_1.Args[2] + ptr2 := v_1.Args[0] + x := v_1.Args[1] + if !(isSamePtr(ptr1, ptr2)) { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } // match: (FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem) // cond: is20Bit(off1+off2) // result: (FMOVDload [off1+off2] {sym} ptr mem) @@ -8363,6 +8791,35 @@ func rewriteValueS390X_OpS390XFMOVDstoreidx_0(v *Value) bool { return false } func rewriteValueS390X_OpS390XFMOVSload_0(v *Value) bool { + // match: (FMOVSload [off] {sym} ptr1 (FMOVSstore [off] {sym} ptr2 x _)) + // cond: isSamePtr(ptr1, ptr2) + // result: x + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + ptr1 := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XFMOVSstore { + break + } + if v_1.AuxInt != off { + break + } + if v_1.Aux != sym { + break + } + _ = v_1.Args[2] + ptr2 := v_1.Args[0] + x := v_1.Args[1] + if !(isSamePtr(ptr1, ptr2)) { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } // match: (FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem) // cond: is20Bit(off1+off2) // result: (FMOVSload [off1+off2] {sym} ptr mem) @@ -8684,6 +9141,64 @@ func rewriteValueS390X_OpS390XFMOVSstoreidx_0(v *Value) bool { } return false } +func rewriteValueS390X_OpS390XFNEG_0(v *Value) bool { + // match: (FNEG (LPDFR x)) + // cond: + // result: (LNDFR x) + for { + v_0 := v.Args[0] + if v_0.Op != OpS390XLPDFR { + break + } + x := v_0.Args[0] + v.reset(OpS390XLNDFR) + v.AddArg(x) + return true + } + // match: (FNEG (LNDFR x)) + // cond: + // result: (LPDFR x) + for { + v_0 := v.Args[0] + if v_0.Op != OpS390XLNDFR { + break + } + x := v_0.Args[0] + v.reset(OpS390XLPDFR) + v.AddArg(x) + return true + } + return false +} +func rewriteValueS390X_OpS390XFNEGS_0(v *Value) bool { + // match: (FNEGS (LPDFR x)) + // cond: + // result: (LNDFR x) + for { + v_0 := v.Args[0] + if v_0.Op != OpS390XLPDFR { + break + } + x := v_0.Args[0] + v.reset(OpS390XLNDFR) + v.AddArg(x) + return true + } + // match: (FNEGS (LNDFR x)) + // cond: + // result: (LPDFR x) + for { + v_0 := v.Args[0] + if v_0.Op != OpS390XLNDFR { + break + } + x := v_0.Args[0] + v.reset(OpS390XLPDFR) + v.AddArg(x) + return true + } + return false +} func rewriteValueS390X_OpS390XFSUB_0(v *Value) bool { // match: (FSUB (FMUL y z) x) // cond: @@ -8728,6 +9243,191 @@ func rewriteValueS390X_OpS390XFSUBS_0(v *Value) bool { } return false } +func rewriteValueS390X_OpS390XLDGR_0(v *Value) bool { + b := v.Block + _ = b + // match: (LDGR (SRDconst [1] (SLDconst [1] x))) + // cond: + // result: (LPDFR (LDGR x)) + for { + t := v.Type + v_0 := v.Args[0] + if v_0.Op != OpS390XSRDconst { + break + } + if v_0.AuxInt != 1 { + break + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpS390XSLDconst { + break + } + if v_0_0.AuxInt != 1 { + break + } + x := v_0_0.Args[0] + v.reset(OpS390XLPDFR) + v0 := b.NewValue0(v.Pos, OpS390XLDGR, t) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (LDGR (OR (MOVDconst [-1<<63]) x)) + // cond: + // result: (LNDFR (LDGR x)) + for { + t := v.Type + v_0 := v.Args[0] + if v_0.Op != OpS390XOR { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpS390XMOVDconst { + break + } + if v_0_0.AuxInt != -1<<63 { + break + } + x := v_0.Args[1] + v.reset(OpS390XLNDFR) + v0 := b.NewValue0(v.Pos, OpS390XLDGR, t) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (LDGR (OR x (MOVDconst [-1<<63]))) + // cond: + // result: (LNDFR (LDGR x)) + for { + t := v.Type + v_0 := v.Args[0] + if v_0.Op != OpS390XOR { + break + } + _ = v_0.Args[1] + x := v_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpS390XMOVDconst { + break + } + if v_0_1.AuxInt != -1<<63 { + break + } + v.reset(OpS390XLNDFR) + v0 := b.NewValue0(v.Pos, OpS390XLDGR, t) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (LDGR x:(ORload [off] {sym} (MOVDconst [-1<<63]) ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (LNDFR (LDGR (MOVDload [off] {sym} ptr mem))) + for { + t := v.Type + x := v.Args[0] + if x.Op != OpS390XORload { + break + } + t1 := x.Type + off := x.AuxInt + sym := x.Aux + _ = x.Args[2] + x_0 := x.Args[0] + if x_0.Op != OpS390XMOVDconst { + break + } + if x_0.AuxInt != -1<<63 { + break + } + ptr := x.Args[1] + mem := x.Args[2] + if !(x.Uses == 1 && clobber(x)) { + break + } + b = x.Block + v0 := b.NewValue0(v.Pos, OpS390XLNDFR, t) + v.reset(OpCopy) + v.AddArg(v0) + v1 := b.NewValue0(v.Pos, OpS390XLDGR, t) + v2 := b.NewValue0(v.Pos, OpS390XMOVDload, t1) + v2.AuxInt = off + v2.Aux = sym + v2.AddArg(ptr) + v2.AddArg(mem) + v1.AddArg(v2) + v0.AddArg(v1) + return true + } + // match: (LDGR (LGDR x)) + // cond: + // result: x + for { + v_0 := v.Args[0] + if v_0.Op != OpS390XLGDR { + break + } + x := v_0.Args[0] + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + return false +} +func rewriteValueS390X_OpS390XLEDBR_0(v *Value) bool { + // match: (LEDBR (LPDFR (LDEBR x))) + // cond: + // result: (LPDFR x) + for { + v_0 := v.Args[0] + if v_0.Op != OpS390XLPDFR { + break + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpS390XLDEBR { + break + } + x := v_0_0.Args[0] + v.reset(OpS390XLPDFR) + v.AddArg(x) + return true + } + // match: (LEDBR (LNDFR (LDEBR x))) + // cond: + // result: (LNDFR x) + for { + v_0 := v.Args[0] + if v_0.Op != OpS390XLNDFR { + break + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpS390XLDEBR { + break + } + x := v_0_0.Args[0] + v.reset(OpS390XLNDFR) + v.AddArg(x) + return true + } + return false +} +func rewriteValueS390X_OpS390XLGDR_0(v *Value) bool { + // match: (LGDR (LDGR x)) + // cond: + // result: (MOVDreg x) + for { + v_0 := v.Args[0] + if v_0.Op != OpS390XLDGR { + break + } + x := v_0.Args[0] + v.reset(OpS390XMOVDreg) + v.AddArg(x) + return true + } + return false +} func rewriteValueS390X_OpS390XLoweredRound32F_0(v *Value) bool { // match: (LoweredRound32F x:(FMOVSconst)) // cond: @@ -8761,24 +9461,28 @@ func rewriteValueS390X_OpS390XLoweredRound64F_0(v *Value) bool { return false } func rewriteValueS390X_OpS390XMOVBZload_0(v *Value) bool { - // match: (MOVBZload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) - // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) + // match: (MOVBZload [off] {sym} ptr1 (MOVBstore [off] {sym} ptr2 x _)) + // cond: isSamePtr(ptr1, ptr2) // result: (MOVBZreg x) for { off := v.AuxInt sym := v.Aux _ = v.Args[1] - ptr := v.Args[0] + ptr1 := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpS390XMOVBstore { break } - off2 := v_1.AuxInt - sym2 := v_1.Aux + if v_1.AuxInt != off { + break + } + if v_1.Aux != sym { + break + } _ = v_1.Args[2] ptr2 := v_1.Args[0] x := v_1.Args[1] - if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { + if !(isSamePtr(ptr1, ptr2)) { break } v.reset(OpS390XMOVBZreg) @@ -9310,6 +10014,34 @@ func rewriteValueS390X_OpS390XMOVBZreg_10(v *Value) bool { return false } func rewriteValueS390X_OpS390XMOVBload_0(v *Value) bool { + // match: (MOVBload [off] {sym} ptr1 (MOVBstore [off] {sym} ptr2 x _)) + // cond: isSamePtr(ptr1, ptr2) + // result: (MOVBreg x) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + ptr1 := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVBstore { + break + } + if v_1.AuxInt != off { + break + } + if v_1.Aux != sym { + break + } + _ = v_1.Args[2] + ptr2 := v_1.Args[0] + x := v_1.Args[1] + if !(isSamePtr(ptr1, ptr2)) { + break + } + v.reset(OpS390XMOVBreg) + v.AddArg(x) + return true + } // match: (MOVBload [off1] {sym} (ADDconst [off2] ptr) mem) // cond: is20Bit(off1+off2) // result: (MOVBload [off1+off2] {sym} ptr mem) @@ -12384,30 +13116,62 @@ func rewriteValueS390X_OpS390XMOVDaddridx_0(v *Value) bool { return false } func rewriteValueS390X_OpS390XMOVDload_0(v *Value) bool { - // match: (MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _)) - // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) + // match: (MOVDload [off] {sym} ptr1 (MOVDstore [off] {sym} ptr2 x _)) + // cond: isSamePtr(ptr1, ptr2) // result: (MOVDreg x) for { off := v.AuxInt sym := v.Aux _ = v.Args[1] - ptr := v.Args[0] + ptr1 := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpS390XMOVDstore { break } - off2 := v_1.AuxInt - sym2 := v_1.Aux + if v_1.AuxInt != off { + break + } + if v_1.Aux != sym { + break + } _ = v_1.Args[2] ptr2 := v_1.Args[0] x := v_1.Args[1] - if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { + if !(isSamePtr(ptr1, ptr2)) { break } v.reset(OpS390XMOVDreg) v.AddArg(x) return true } + // match: (MOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) + // cond: isSamePtr(ptr1, ptr2) + // result: (LGDR x) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + ptr1 := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XFMOVDstore { + break + } + if v_1.AuxInt != off { + break + } + if v_1.Aux != sym { + break + } + _ = v_1.Args[2] + ptr2 := v_1.Args[0] + x := v_1.Args[1] + if !(isSamePtr(ptr1, ptr2)) { + break + } + v.reset(OpS390XLGDR) + v.AddArg(x) + return true + } // match: (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem) // cond: is20Bit(off1+off2) // result: (MOVDload [off1+off2] {sym} ptr mem) @@ -14756,24 +15520,28 @@ func rewriteValueS390X_OpS390XMOVHBRstoreidx_10(v *Value) bool { return false } func rewriteValueS390X_OpS390XMOVHZload_0(v *Value) bool { - // match: (MOVHZload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) - // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) + // match: (MOVHZload [off] {sym} ptr1 (MOVHstore [off] {sym} ptr2 x _)) + // cond: isSamePtr(ptr1, ptr2) // result: (MOVHZreg x) for { off := v.AuxInt sym := v.Aux _ = v.Args[1] - ptr := v.Args[0] + ptr1 := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpS390XMOVHstore { break } - off2 := v_1.AuxInt - sym2 := v_1.Aux + if v_1.AuxInt != off { + break + } + if v_1.Aux != sym { + break + } _ = v_1.Args[2] ptr2 := v_1.Args[0] x := v_1.Args[1] - if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { + if !(isSamePtr(ptr1, ptr2)) { break } v.reset(OpS390XMOVHZreg) @@ -15120,6 +15888,34 @@ func rewriteValueS390X_OpS390XMOVHZreg_0(v *Value) bool { return false } func rewriteValueS390X_OpS390XMOVHload_0(v *Value) bool { + // match: (MOVHload [off] {sym} ptr1 (MOVHstore [off] {sym} ptr2 x _)) + // cond: isSamePtr(ptr1, ptr2) + // result: (MOVHreg x) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + ptr1 := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVHstore { + break + } + if v_1.AuxInt != off { + break + } + if v_1.Aux != sym { + break + } + _ = v_1.Args[2] + ptr2 := v_1.Args[0] + x := v_1.Args[1] + if !(isSamePtr(ptr1, ptr2)) { + break + } + v.reset(OpS390XMOVHreg) + v.AddArg(x) + return true + } // match: (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem) // cond: is20Bit(off1+off2) // result: (MOVHload [off1+off2] {sym} ptr mem) @@ -17241,24 +18037,28 @@ func rewriteValueS390X_OpS390XMOVWBRstoreidx_0(v *Value) bool { return false } func rewriteValueS390X_OpS390XMOVWZload_0(v *Value) bool { - // match: (MOVWZload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) - // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) + // match: (MOVWZload [off] {sym} ptr1 (MOVWstore [off] {sym} ptr2 x _)) + // cond: isSamePtr(ptr1, ptr2) // result: (MOVWZreg x) for { off := v.AuxInt sym := v.Aux _ = v.Args[1] - ptr := v.Args[0] + ptr1 := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpS390XMOVWstore { break } - off2 := v_1.AuxInt - sym2 := v_1.Aux + if v_1.AuxInt != off { + break + } + if v_1.Aux != sym { + break + } _ = v_1.Args[2] ptr2 := v_1.Args[0] x := v_1.Args[1] - if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { + if !(isSamePtr(ptr1, ptr2)) { break } v.reset(OpS390XMOVWZreg) @@ -17630,6 +18430,34 @@ func rewriteValueS390X_OpS390XMOVWZreg_0(v *Value) bool { return false } func rewriteValueS390X_OpS390XMOVWload_0(v *Value) bool { + // match: (MOVWload [off] {sym} ptr1 (MOVWstore [off] {sym} ptr2 x _)) + // cond: isSamePtr(ptr1, ptr2) + // result: (MOVWreg x) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + ptr1 := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVWstore { + break + } + if v_1.AuxInt != off { + break + } + if v_1.Aux != sym { + break + } + _ = v_1.Args[2] + ptr2 := v_1.Args[0] + x := v_1.Args[1] + if !(isSamePtr(ptr1, ptr2)) { + break + } + v.reset(OpS390XMOVWreg) + v.AddArg(x) + return true + } // match: (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem) // cond: is20Bit(off1+off2) // result: (MOVWload [off1+off2] {sym} ptr mem) @@ -19140,6 +19968,97 @@ func rewriteValueS390X_OpS390XMULLDconst_0(v *Value) bool { } return false } +func rewriteValueS390X_OpS390XMULLDload_0(v *Value) bool { + b := v.Block + _ = b + // match: (MULLDload [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) + // cond: isSamePtr(ptr1, ptr2) + // result: (MULLD x (LGDR y)) + for { + t := v.Type + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + x := v.Args[0] + ptr1 := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpS390XFMOVDstore { + break + } + if v_2.AuxInt != off { + break + } + if v_2.Aux != sym { + break + } + _ = v_2.Args[2] + ptr2 := v_2.Args[0] + y := v_2.Args[1] + if !(isSamePtr(ptr1, ptr2)) { + break + } + v.reset(OpS390XMULLD) + v.AddArg(x) + v0 := b.NewValue0(v.Pos, OpS390XLGDR, t) + v0.AddArg(y) + v.AddArg(v0) + return true + } + // match: (MULLDload [off1] {sym} x (ADDconst [off2] ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(off1+off2) + // result: (MULLDload [off1+off2] {sym} x ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XADDconst { + break + } + off2 := v_1.AuxInt + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(off1+off2)) { + break + } + v.reset(OpS390XMULLDload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MULLDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) + // result: (MULLDload [o1+o2] {mergeSym(s1, s2)} x ptr mem) + for { + o1 := v.AuxInt + s1 := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVDaddr { + break + } + o2 := v_1.AuxInt + s2 := v_1.Aux + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2)) { + break + } + v.reset(OpS390XMULLDload) + v.AuxInt = o1 + o2 + v.Aux = mergeSym(s1, s2) + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} func rewriteValueS390X_OpS390XMULLW_0(v *Value) bool { // match: (MULLW x (MOVDconst [c])) // cond: @@ -19502,6 +20421,62 @@ func rewriteValueS390X_OpS390XMULLWconst_0(v *Value) bool { } return false } +func rewriteValueS390X_OpS390XMULLWload_0(v *Value) bool { + // match: (MULLWload [off1] {sym} x (ADDconst [off2] ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(off1+off2) + // result: (MULLWload [off1+off2] {sym} x ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XADDconst { + break + } + off2 := v_1.AuxInt + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(off1+off2)) { + break + } + v.reset(OpS390XMULLWload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MULLWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) + // result: (MULLWload [o1+o2] {mergeSym(s1, s2)} x ptr mem) + for { + o1 := v.AuxInt + s1 := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVDaddr { + break + } + o2 := v_1.AuxInt + s2 := v_1.Aux + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2)) { + break + } + v.reset(OpS390XMULLWload) + v.AuxInt = o1 + o2 + v.Aux = mergeSym(s1, s2) + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} func rewriteValueS390X_OpS390XNEG_0(v *Value) bool { // match: (NEG (MOVDconst [c])) // cond: @@ -19595,6 +20570,8 @@ func rewriteValueS390X_OpS390XNOTW_0(v *Value) bool { return false } func rewriteValueS390X_OpS390XOR_0(v *Value) bool { + b := v.Block + _ = b // match: (OR x (MOVDconst [c])) // cond: isU32Bit(c) // result: (ORconst [c] x) @@ -19687,6 +20664,227 @@ func rewriteValueS390X_OpS390XOR_0(v *Value) bool { v.AddArg(x) return true } + // match: (OR (MOVDconst [-1<<63]) (LGDR x)) + // cond: + // result: (LGDR (LNDFR x)) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpS390XMOVDconst { + break + } + if v_0.AuxInt != -1<<63 { + break + } + v_1 := v.Args[1] + if v_1.Op != OpS390XLGDR { + break + } + t := v_1.Type + x := v_1.Args[0] + v.reset(OpS390XLGDR) + v.Type = t + v0 := b.NewValue0(v.Pos, OpS390XLNDFR, x.Type) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (OR (LGDR x) (MOVDconst [-1<<63])) + // cond: + // result: (LGDR (LNDFR x)) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpS390XLGDR { + break + } + t := v_0.Type + x := v_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVDconst { + break + } + if v_1.AuxInt != -1<<63 { + break + } + v.reset(OpS390XLGDR) + v.Type = t + v0 := b.NewValue0(v.Pos, OpS390XLNDFR, x.Type) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (OR (SLDconst [63] (SRDconst [63] (LGDR x))) (LGDR (LPDFR y))) + // cond: + // result: (LGDR (CPSDR y x)) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpS390XSLDconst { + break + } + if v_0.AuxInt != 63 { + break + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpS390XSRDconst { + break + } + if v_0_0.AuxInt != 63 { + break + } + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpS390XLGDR { + break + } + x := v_0_0_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XLGDR { + break + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpS390XLPDFR { + break + } + t := v_1_0.Type + y := v_1_0.Args[0] + v.reset(OpS390XLGDR) + v0 := b.NewValue0(v.Pos, OpS390XCPSDR, t) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (OR (LGDR (LPDFR y)) (SLDconst [63] (SRDconst [63] (LGDR x)))) + // cond: + // result: (LGDR (CPSDR y x)) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpS390XLGDR { + break + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpS390XLPDFR { + break + } + t := v_0_0.Type + y := v_0_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XSLDconst { + break + } + if v_1.AuxInt != 63 { + break + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpS390XSRDconst { + break + } + if v_1_0.AuxInt != 63 { + break + } + v_1_0_0 := v_1_0.Args[0] + if v_1_0_0.Op != OpS390XLGDR { + break + } + x := v_1_0_0.Args[0] + v.reset(OpS390XLGDR) + v0 := b.NewValue0(v.Pos, OpS390XCPSDR, t) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (OR (SLDconst [63] (SRDconst [63] (LGDR x))) (MOVDconst [c])) + // cond: c & -1<<63 == 0 + // result: (LGDR (CPSDR (FMOVDconst [c]) x)) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpS390XSLDconst { + break + } + if v_0.AuxInt != 63 { + break + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpS390XSRDconst { + break + } + if v_0_0.AuxInt != 63 { + break + } + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpS390XLGDR { + break + } + x := v_0_0_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVDconst { + break + } + c := v_1.AuxInt + if !(c&-1<<63 == 0) { + break + } + v.reset(OpS390XLGDR) + v0 := b.NewValue0(v.Pos, OpS390XCPSDR, x.Type) + v1 := b.NewValue0(v.Pos, OpS390XFMOVDconst, x.Type) + v1.AuxInt = c + v0.AddArg(v1) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (OR (MOVDconst [c]) (SLDconst [63] (SRDconst [63] (LGDR x)))) + // cond: c & -1<<63 == 0 + // result: (LGDR (CPSDR (FMOVDconst [c]) x)) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpS390XMOVDconst { + break + } + c := v_0.AuxInt + v_1 := v.Args[1] + if v_1.Op != OpS390XSLDconst { + break + } + if v_1.AuxInt != 63 { + break + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpS390XSRDconst { + break + } + if v_1_0.AuxInt != 63 { + break + } + v_1_0_0 := v_1_0.Args[0] + if v_1_0_0.Op != OpS390XLGDR { + break + } + x := v_1_0_0.Args[0] + if !(c&-1<<63 == 0) { + break + } + v.reset(OpS390XLGDR) + v0 := b.NewValue0(v.Pos, OpS390XCPSDR, x.Type) + v1 := b.NewValue0(v.Pos, OpS390XFMOVDconst, x.Type) + v1.AuxInt = c + v0.AddArg(v1) + v0.AddArg(x) + v.AddArg(v0) + return true + } + return false +} +func rewriteValueS390X_OpS390XOR_10(v *Value) bool { + b := v.Block + _ = b + typ := &b.Func.Config.Types + _ = typ // match: (OR (MOVDconst [c]) (MOVDconst [d])) // cond: // result: (MOVDconst [c|d]) @@ -19823,13 +21021,6 @@ func rewriteValueS390X_OpS390XOR_0(v *Value) bool { v.AddArg(mem) return true } - return false -} -func rewriteValueS390X_OpS390XOR_10(v *Value) bool { - b := v.Block - _ = b - typ := &b.Func.Config.Types - _ = typ // match: (OR x g:(MOVDload [off] {sym} ptr mem)) // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g) // result: (ORload [off] {sym} x ptr mem) @@ -20005,6 +21196,13 @@ func rewriteValueS390X_OpS390XOR_10(v *Value) bool { v0.AddArg(mem) return true } + return false +} +func rewriteValueS390X_OpS390XOR_20(v *Value) bool { + b := v.Block + _ = b + typ := &b.Func.Config.Types + _ = typ // match: (OR sh:(SLDconst [16] x0:(MOVHZload [i0] {s} p mem)) x1:(MOVHZload [i1] {s} p mem)) // cond: i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem) @@ -20344,13 +21542,6 @@ func rewriteValueS390X_OpS390XOR_10(v *Value) bool { v0.AddArg(y) return true } - return false -} -func rewriteValueS390X_OpS390XOR_20(v *Value) bool { - b := v.Block - _ = b - typ := &b.Func.Config.Types - _ = typ // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))) s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))) // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1) (OR (SLDconst [j1] (MOVHZload [i0] {s} p mem)) y) @@ -20607,6 +21798,13 @@ func rewriteValueS390X_OpS390XOR_20(v *Value) bool { v0.AddArg(y) return true } + return false +} +func rewriteValueS390X_OpS390XOR_30(v *Value) bool { + b := v.Block + _ = b + typ := &b.Func.Config.Types + _ = typ // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVHZload [i1] {s} p mem))) s0:(SLDconst [j0] x0:(MOVHZload [i0] {s} p mem))) // cond: i1 == i0+2 && j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1) (OR (SLDconst [j1] (MOVWZload [i0] {s} p mem)) y) @@ -20941,13 +22139,6 @@ func rewriteValueS390X_OpS390XOR_20(v *Value) bool { v0.AddArg(mem) return true } - return false -} -func rewriteValueS390X_OpS390XOR_30(v *Value) bool { - b := v.Block - _ = b - typ := &b.Func.Config.Types - _ = typ // match: (OR sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} idx p mem)) x1:(MOVBZloadidx [i1] {s} p idx mem)) // cond: i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem) @@ -21164,6 +22355,13 @@ func rewriteValueS390X_OpS390XOR_30(v *Value) bool { v0.AddArg(mem) return true } + return false +} +func rewriteValueS390X_OpS390XOR_40(v *Value) bool { + b := v.Block + _ = b + typ := &b.Func.Config.Types + _ = typ // match: (OR x1:(MOVHZloadidx [i1] {s} idx p mem) sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem))) // cond: i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem) @@ -21488,13 +22686,6 @@ func rewriteValueS390X_OpS390XOR_30(v *Value) bool { v0.AddArg(mem) return true } - return false -} -func rewriteValueS390X_OpS390XOR_40(v *Value) bool { - b := v.Block - _ = b - typ := &b.Func.Config.Types - _ = typ // match: (OR sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} idx p mem)) x1:(MOVHZloadidx [i1] {s} idx p mem)) // cond: i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem) @@ -21711,6 +22902,13 @@ func rewriteValueS390X_OpS390XOR_40(v *Value) bool { v0.AddArg(mem) return true } + return false +} +func rewriteValueS390X_OpS390XOR_50(v *Value) bool { + b := v.Block + _ = b + typ := &b.Func.Config.Types + _ = typ // match: (OR x1:(MOVWZloadidx [i1] {s} idx p mem) sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} idx p mem))) // cond: i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem) @@ -22050,13 +23248,6 @@ func rewriteValueS390X_OpS390XOR_40(v *Value) bool { v0.AddArg(y) return true } - return false -} -func rewriteValueS390X_OpS390XOR_50(v *Value) bool { - b := v.Block - _ = b - typ := &b.Func.Config.Types - _ = typ // match: (OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) or:(OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) y)) // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1) (OR (SLDconst [j1] (MOVHZloadidx [i0] {s} p idx mem)) y) @@ -22333,6 +23524,13 @@ func rewriteValueS390X_OpS390XOR_50(v *Value) bool { v0.AddArg(y) return true } + return false +} +func rewriteValueS390X_OpS390XOR_60(v *Value) bool { + b := v.Block + _ = b + typ := &b.Func.Config.Types + _ = typ // match: (OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)))) // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1) (OR (SLDconst [j1] (MOVHZloadidx [i0] {s} p idx mem)) y) @@ -22747,13 +23945,6 @@ func rewriteValueS390X_OpS390XOR_50(v *Value) bool { v0.AddArg(y) return true } - return false -} -func rewriteValueS390X_OpS390XOR_60(v *Value) bool { - b := v.Block - _ = b - typ := &b.Func.Config.Types - _ = typ // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem))) s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))) // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1) (OR (SLDconst [j1] (MOVHZloadidx [i0] {s} p idx mem)) y) @@ -23030,6 +24221,13 @@ func rewriteValueS390X_OpS390XOR_60(v *Value) bool { v0.AddArg(y) return true } + return false +} +func rewriteValueS390X_OpS390XOR_70(v *Value) bool { + b := v.Block + _ = b + typ := &b.Func.Config.Types + _ = typ // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem))) s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem))) // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1) (OR (SLDconst [j1] (MOVHZloadidx [i0] {s} p idx mem)) y) @@ -23444,13 +24642,6 @@ func rewriteValueS390X_OpS390XOR_60(v *Value) bool { v0.AddArg(y) return true } - return false -} -func rewriteValueS390X_OpS390XOR_70(v *Value) bool { - b := v.Block - _ = b - typ := &b.Func.Config.Types - _ = typ // match: (OR s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} idx p mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem)))) // cond: i1 == i0+2 && j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1) (OR (SLDconst [j1] (MOVWZloadidx [i0] {s} p idx mem)) y) @@ -23727,6 +24918,13 @@ func rewriteValueS390X_OpS390XOR_70(v *Value) bool { v0.AddArg(y) return true } + return false +} +func rewriteValueS390X_OpS390XOR_80(v *Value) bool { + b := v.Block + _ = b + typ := &b.Func.Config.Types + _ = typ // match: (OR or:(OR s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} idx p mem)) y) s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem))) // cond: i1 == i0+2 && j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1) (OR (SLDconst [j1] (MOVWZloadidx [i0] {s} p idx mem)) y) @@ -24141,13 +25339,6 @@ func rewriteValueS390X_OpS390XOR_70(v *Value) bool { v0.AddArg(y) return true } - return false -} -func rewriteValueS390X_OpS390XOR_80(v *Value) bool { - b := v.Block - _ = b - typ := &b.Func.Config.Types - _ = typ // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} idx p mem))) s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} idx p mem))) // cond: i1 == i0+2 && j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1) (OR (SLDconst [j1] (MOVWZloadidx [i0] {s} p idx mem)) y) @@ -24378,6 +25569,13 @@ func rewriteValueS390X_OpS390XOR_80(v *Value) bool { v0.AddArg(v1) return true } + return false +} +func rewriteValueS390X_OpS390XOR_90(v *Value) bool { + b := v.Block + _ = b + typ := &b.Func.Config.Types + _ = typ // match: (OR sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))) r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))) // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRload [i0] {s} p mem)) @@ -24749,13 +25947,6 @@ func rewriteValueS390X_OpS390XOR_80(v *Value) bool { v0.AddArg(y) return true } - return false -} -func rewriteValueS390X_OpS390XOR_90(v *Value) bool { - b := v.Block - _ = b - typ := &b.Func.Config.Types - _ = typ // match: (OR or:(OR y s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))) s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))) // cond: p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1) (OR (SLDconst [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y) @@ -25044,6 +26235,13 @@ func rewriteValueS390X_OpS390XOR_90(v *Value) bool { v0.AddArg(y) return true } + return false +} +func rewriteValueS390X_OpS390XOR_100(v *Value) bool { + b := v.Block + _ = b + typ := &b.Func.Config.Types + _ = typ // match: (OR or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)))) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem)))) // cond: i1 == i0+2 && j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1) (OR (SLDconst [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y) @@ -25398,13 +26596,6 @@ func rewriteValueS390X_OpS390XOR_90(v *Value) bool { v0.AddArg(v1) return true } - return false -} -func rewriteValueS390X_OpS390XOR_100(v *Value) bool { - b := v.Block - _ = b - typ := &b.Func.Config.Types - _ = typ // match: (OR sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} idx p mem)) x0:(MOVBZloadidx [i0] {s} p idx mem)) // cond: p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem)) @@ -25637,6 +26828,13 @@ func rewriteValueS390X_OpS390XOR_100(v *Value) bool { v0.AddArg(v1) return true } + return false +} +func rewriteValueS390X_OpS390XOR_110(v *Value) bool { + b := v.Block + _ = b + typ := &b.Func.Config.Types + _ = typ // match: (OR r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)) sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem)))) // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem)) @@ -26021,13 +27219,6 @@ func rewriteValueS390X_OpS390XOR_100(v *Value) bool { v0.AddArg(v1) return true } - return false -} -func rewriteValueS390X_OpS390XOR_110(v *Value) bool { - b := v.Block - _ = b - typ := &b.Func.Config.Types - _ = typ // match: (OR sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))) r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem))) // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem)) @@ -26278,6 +27469,13 @@ func rewriteValueS390X_OpS390XOR_110(v *Value) bool { v0.AddArg(mem) return true } + return false +} +func rewriteValueS390X_OpS390XOR_120(v *Value) bool { + b := v.Block + _ = b + typ := &b.Func.Config.Types + _ = typ // match: (OR r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} idx p mem)) sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} idx p mem)))) // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem) @@ -26659,13 +27857,6 @@ func rewriteValueS390X_OpS390XOR_110(v *Value) bool { v0.AddArg(y) return true } - return false -} -func rewriteValueS390X_OpS390XOR_120(v *Value) bool { - b := v.Block - _ = b - typ := &b.Func.Config.Types - _ = typ // match: (OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) or:(OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) y)) // cond: p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1) (OR (SLDconst [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y) @@ -26950,6 +28141,13 @@ func rewriteValueS390X_OpS390XOR_120(v *Value) bool { v0.AddArg(y) return true } + return false +} +func rewriteValueS390X_OpS390XOR_130(v *Value) bool { + b := v.Block + _ = b + typ := &b.Func.Config.Types + _ = typ // match: (OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) or:(OR y s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)))) // cond: p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1) (OR (SLDconst [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y) @@ -27376,13 +28574,6 @@ func rewriteValueS390X_OpS390XOR_120(v *Value) bool { v0.AddArg(y) return true } - return false -} -func rewriteValueS390X_OpS390XOR_130(v *Value) bool { - b := v.Block - _ = b - typ := &b.Func.Config.Types - _ = typ // match: (OR or:(OR y s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem))) s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))) // cond: p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1) (OR (SLDconst [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y) @@ -27667,6 +28858,13 @@ func rewriteValueS390X_OpS390XOR_130(v *Value) bool { v0.AddArg(y) return true } + return false +} +func rewriteValueS390X_OpS390XOR_140(v *Value) bool { + b := v.Block + _ = b + typ := &b.Func.Config.Types + _ = typ // match: (OR or:(OR y s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem))) s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem))) // cond: p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1) (OR (SLDconst [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y) @@ -28133,13 +29331,6 @@ func rewriteValueS390X_OpS390XOR_130(v *Value) bool { v0.AddArg(y) return true } - return false -} -func rewriteValueS390X_OpS390XOR_140(v *Value) bool { - b := v.Block - _ = b - typ := &b.Func.Config.Types - _ = typ // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))) or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))))) // cond: i1 == i0+2 && j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1) (OR (SLDconst [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y) @@ -28456,6 +29647,13 @@ func rewriteValueS390X_OpS390XOR_140(v *Value) bool { v0.AddArg(y) return true } + return false +} +func rewriteValueS390X_OpS390XOR_150(v *Value) bool { + b := v.Block + _ = b + typ := &b.Func.Config.Types + _ = typ // match: (OR or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem))) y) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem)))) // cond: i1 == i0+2 && j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1) (OR (SLDconst [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y) @@ -28930,13 +30128,6 @@ func rewriteValueS390X_OpS390XOR_140(v *Value) bool { v0.AddArg(y) return true } - return false -} -func rewriteValueS390X_OpS390XOR_150(v *Value) bool { - b := v.Block - _ = b - typ := &b.Func.Config.Types - _ = typ // match: (OR or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)))) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem)))) // cond: i1 == i0+2 && j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1) (OR (SLDconst [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y) @@ -34431,6 +35622,62 @@ func rewriteValueS390X_OpS390XORWconst_0(v *Value) bool { } return false } +func rewriteValueS390X_OpS390XORWload_0(v *Value) bool { + // match: (ORWload [off1] {sym} x (ADDconst [off2] ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(off1+off2) + // result: (ORWload [off1+off2] {sym} x ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XADDconst { + break + } + off2 := v_1.AuxInt + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(off1+off2)) { + break + } + v.reset(OpS390XORWload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (ORWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) + // result: (ORWload [o1+o2] {mergeSym(s1, s2)} x ptr mem) + for { + o1 := v.AuxInt + s1 := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVDaddr { + break + } + o2 := v_1.AuxInt + s2 := v_1.Aux + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2)) { + break + } + v.reset(OpS390XORWload) + v.AuxInt = o1 + o2 + v.Aux = mergeSym(s1, s2) + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} func rewriteValueS390X_OpS390XORconst_0(v *Value) bool { // match: (ORconst [0] x) // cond: @@ -34472,6 +35719,97 @@ func rewriteValueS390X_OpS390XORconst_0(v *Value) bool { } return false } +func rewriteValueS390X_OpS390XORload_0(v *Value) bool { + b := v.Block + _ = b + // match: (ORload [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) + // cond: isSamePtr(ptr1, ptr2) + // result: (OR x (LGDR y)) + for { + t := v.Type + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + x := v.Args[0] + ptr1 := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpS390XFMOVDstore { + break + } + if v_2.AuxInt != off { + break + } + if v_2.Aux != sym { + break + } + _ = v_2.Args[2] + ptr2 := v_2.Args[0] + y := v_2.Args[1] + if !(isSamePtr(ptr1, ptr2)) { + break + } + v.reset(OpS390XOR) + v.AddArg(x) + v0 := b.NewValue0(v.Pos, OpS390XLGDR, t) + v0.AddArg(y) + v.AddArg(v0) + return true + } + // match: (ORload [off1] {sym} x (ADDconst [off2] ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(off1+off2) + // result: (ORload [off1+off2] {sym} x ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XADDconst { + break + } + off2 := v_1.AuxInt + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(off1+off2)) { + break + } + v.reset(OpS390XORload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (ORload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) + // result: (ORload [o1+o2] {mergeSym(s1, s2)} x ptr mem) + for { + o1 := v.AuxInt + s1 := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVDaddr { + break + } + o2 := v_1.AuxInt + s2 := v_1.Aux + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2)) { + break + } + v.reset(OpS390XORload) + v.AuxInt = o1 + o2 + v.Aux = mergeSym(s1, s2) + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} func rewriteValueS390X_OpS390XSLD_0(v *Value) bool { // match: (SLD x (MOVDconst [c])) // cond: @@ -34696,6 +36034,38 @@ func rewriteValueS390X_OpS390XSRD_0(v *Value) bool { } return false } +func rewriteValueS390X_OpS390XSRDconst_0(v *Value) bool { + b := v.Block + _ = b + // match: (SRDconst [1] (SLDconst [1] (LGDR x))) + // cond: + // result: (LGDR (LPDFR x)) + for { + if v.AuxInt != 1 { + break + } + v_0 := v.Args[0] + if v_0.Op != OpS390XSLDconst { + break + } + if v_0.AuxInt != 1 { + break + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpS390XLGDR { + break + } + t := v_0_0.Type + x := v_0_0.Args[0] + v.reset(OpS390XLGDR) + v.Type = t + v0 := b.NewValue0(v.Pos, OpS390XLPDFR, x.Type) + v0.AddArg(x) + v.AddArg(v0) + return true + } + return false +} func rewriteValueS390X_OpS390XSRW_0(v *Value) bool { // match: (SRW x (MOVDconst [c])) // cond: @@ -35149,6 +36519,62 @@ func rewriteValueS390X_OpS390XSUBWconst_0(v *Value) bool { return true } } +func rewriteValueS390X_OpS390XSUBWload_0(v *Value) bool { + // match: (SUBWload [off1] {sym} x (ADDconst [off2] ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(off1+off2) + // result: (SUBWload [off1+off2] {sym} x ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XADDconst { + break + } + off2 := v_1.AuxInt + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(off1+off2)) { + break + } + v.reset(OpS390XSUBWload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (SUBWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) + // result: (SUBWload [o1+o2] {mergeSym(s1, s2)} x ptr mem) + for { + o1 := v.AuxInt + s1 := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVDaddr { + break + } + o2 := v_1.AuxInt + s2 := v_1.Aux + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2)) { + break + } + v.reset(OpS390XSUBWload) + v.AuxInt = o1 + o2 + v.Aux = mergeSym(s1, s2) + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} func rewriteValueS390X_OpS390XSUBconst_0(v *Value) bool { // match: (SUBconst [0] x) // cond: @@ -35212,6 +36638,97 @@ func rewriteValueS390X_OpS390XSUBconst_0(v *Value) bool { } return false } +func rewriteValueS390X_OpS390XSUBload_0(v *Value) bool { + b := v.Block + _ = b + // match: (SUBload [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) + // cond: isSamePtr(ptr1, ptr2) + // result: (SUB x (LGDR y)) + for { + t := v.Type + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + x := v.Args[0] + ptr1 := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpS390XFMOVDstore { + break + } + if v_2.AuxInt != off { + break + } + if v_2.Aux != sym { + break + } + _ = v_2.Args[2] + ptr2 := v_2.Args[0] + y := v_2.Args[1] + if !(isSamePtr(ptr1, ptr2)) { + break + } + v.reset(OpS390XSUB) + v.AddArg(x) + v0 := b.NewValue0(v.Pos, OpS390XLGDR, t) + v0.AddArg(y) + v.AddArg(v0) + return true + } + // match: (SUBload [off1] {sym} x (ADDconst [off2] ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(off1+off2) + // result: (SUBload [off1+off2] {sym} x ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XADDconst { + break + } + off2 := v_1.AuxInt + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(off1+off2)) { + break + } + v.reset(OpS390XSUBload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (SUBload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) + // result: (SUBload [o1+o2] {mergeSym(s1, s2)} x ptr mem) + for { + o1 := v.AuxInt + s1 := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVDaddr { + break + } + o2 := v_1.AuxInt + s2 := v_1.Aux + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2)) { + break + } + v.reset(OpS390XSUBload) + v.AuxInt = o1 + o2 + v.Aux = mergeSym(s1, s2) + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} func rewriteValueS390X_OpS390XXOR_0(v *Value) bool { // match: (XOR x (MOVDconst [c])) // cond: isU32Bit(c) @@ -35833,6 +37350,62 @@ func rewriteValueS390X_OpS390XXORWconst_0(v *Value) bool { } return false } +func rewriteValueS390X_OpS390XXORWload_0(v *Value) bool { + // match: (XORWload [off1] {sym} x (ADDconst [off2] ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(off1+off2) + // result: (XORWload [off1+off2] {sym} x ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XADDconst { + break + } + off2 := v_1.AuxInt + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(off1+off2)) { + break + } + v.reset(OpS390XXORWload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (XORWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) + // result: (XORWload [o1+o2] {mergeSym(s1, s2)} x ptr mem) + for { + o1 := v.AuxInt + s1 := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVDaddr { + break + } + o2 := v_1.AuxInt + s2 := v_1.Aux + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2)) { + break + } + v.reset(OpS390XXORWload) + v.AuxInt = o1 + o2 + v.Aux = mergeSym(s1, s2) + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} func rewriteValueS390X_OpS390XXORconst_0(v *Value) bool { // match: (XORconst [0] x) // cond: @@ -35863,6 +37436,97 @@ func rewriteValueS390X_OpS390XXORconst_0(v *Value) bool { } return false } +func rewriteValueS390X_OpS390XXORload_0(v *Value) bool { + b := v.Block + _ = b + // match: (XORload [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) + // cond: isSamePtr(ptr1, ptr2) + // result: (XOR x (LGDR y)) + for { + t := v.Type + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + x := v.Args[0] + ptr1 := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpS390XFMOVDstore { + break + } + if v_2.AuxInt != off { + break + } + if v_2.Aux != sym { + break + } + _ = v_2.Args[2] + ptr2 := v_2.Args[0] + y := v_2.Args[1] + if !(isSamePtr(ptr1, ptr2)) { + break + } + v.reset(OpS390XXOR) + v.AddArg(x) + v0 := b.NewValue0(v.Pos, OpS390XLGDR, t) + v0.AddArg(y) + v.AddArg(v0) + return true + } + // match: (XORload [off1] {sym} x (ADDconst [off2] ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(off1+off2) + // result: (XORload [off1+off2] {sym} x ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XADDconst { + break + } + off2 := v_1.AuxInt + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(off1+off2)) { + break + } + v.reset(OpS390XXORload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (XORload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) + // cond: ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) + // result: (XORload [o1+o2] {mergeSym(s1, s2)} x ptr mem) + for { + o1 := v.AuxInt + s1 := v.Aux + _ = v.Args[2] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVDaddr { + break + } + o2 := v_1.AuxInt + s2 := v_1.Aux + ptr := v_1.Args[0] + mem := v.Args[2] + if !(ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2)) { + break + } + v.reset(OpS390XXORload) + v.AuxInt = o1 + o2 + v.Aux = mergeSym(s1, s2) + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} func rewriteValueS390X_OpSelect0_0(v *Value) bool { b := v.Block _ = b diff --git a/src/cmd/internal/obj/s390x/a.out.go b/src/cmd/internal/obj/s390x/a.out.go index 9d5b34a0b1..13ebc79feb 100644 --- a/src/cmd/internal/obj/s390x/a.out.go +++ b/src/cmd/internal/obj/s390x/a.out.go @@ -283,12 +283,15 @@ const ( AFNEGS ALEDBR ALDEBR + ALPDFR + ALNDFR AFSUB AFSUBS AFSQRT AFSQRTS AFIEBR AFIDBR + ACPSDR // move from GPR to FPR and vice versa ALDGR diff --git a/src/cmd/internal/obj/s390x/anames.go b/src/cmd/internal/obj/s390x/anames.go index 42a0222b57..ef360551ed 100644 --- a/src/cmd/internal/obj/s390x/anames.go +++ b/src/cmd/internal/obj/s390x/anames.go @@ -81,12 +81,15 @@ var Anames = []string{ "FNEGS", "LEDBR", "LDEBR", + "LPDFR", + "LNDFR", "FSUB", "FSUBS", "FSQRT", "FSQRTS", "FIEBR", "FIDBR", + "CPSDR", "LDGR", "LGDR", "CEFBRA", diff --git a/src/cmd/internal/obj/s390x/asmz.go b/src/cmd/internal/obj/s390x/asmz.go index e108032c5c..a8f05807a1 100644 --- a/src/cmd/internal/obj/s390x/asmz.go +++ b/src/cmd/internal/obj/s390x/asmz.go @@ -212,6 +212,7 @@ var optab = []Optab{ Optab{ACEFBRA, C_REG, C_NONE, C_NONE, C_FREG, 82, 0}, Optab{ACFEBRA, C_FREG, C_NONE, C_NONE, C_REG, 83, 0}, Optab{AFIEBR, C_SCON, C_FREG, C_NONE, C_FREG, 48, 0}, + Optab{ACPSDR, C_FREG, C_FREG, C_NONE, C_FREG, 49, 0}, // load symbol address (plus offset) Optab{AMOVD, C_SYMADDR, C_NONE, C_NONE, C_REG, 19, 0}, @@ -897,6 +898,8 @@ func buildop(ctxt *obj.Link) { opset(ABCL, r) case AFABS: opset(AFNABS, r) + opset(ALPDFR, r) + opset(ALNDFR, r) opset(AFNEG, r) opset(AFNEGS, r) opset(ALEDBR, r) @@ -3182,6 +3185,10 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) { opcode = op_LPDBR case AFNABS: opcode = op_LNDBR + case ALPDFR: + opcode = op_LPDFR + case ALNDFR: + opcode = op_LNDFR case AFNEG: opcode = op_LCDFR case AFNEGS: @@ -3281,6 +3288,9 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) { } zRRF(opcode, uint32(m3), 0, uint32(p.To.Reg), uint32(p.Reg), asm) + case 49: // copysign + zRRF(op_CPSDR, uint32(p.From.Reg), 0, uint32(p.To.Reg), uint32(p.Reg), asm) + case 67: // fmov $0 freg var opcode uint32 switch p.As { -- 2.48.1