From 4e2b84ffc59610c380df3354239f037df4d3e2b2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Martin=20M=C3=B6hrmann?= Date: Mon, 9 Sep 2019 17:50:35 +0200 Subject: [PATCH] compile: prefer an AND instead of SHR+SHL instructions MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit On modern 64bit CPUs a SHR, SHL or AND instruction take 1 cycle to execute. A pair of shifts that operate on the same register will take 2 cycles and needs to wait for the input register value to be available. Large constants used to mask the high bits of a register with an AND instruction can not be encoded as an immediate in the AND instruction on amd64 and therefore need to be loaded into a register with a MOV instruction. However that MOV instruction is not dependent on the output register and on many CPUs does not compete with the AND or shift instructions for execution ports. Using a pair of shifts to mask high bits instead of an AND to mask high bits of a register has a shorter encoding and uses one less general purpose register but is slower due to taking one clock cycle longer if there is no register pressure that would make the AND variant need to generate a spill. For example the instructions emitted for (x & 1 << 63) before this CL are: 48c1ea3f SHRQ $0x3f, DX 48c1e23f SHLQ $0x3f, DX after this CL the instructions are the same as GCC and LLVM use: 48b80000000000000080 MOVQ $0x8000000000000000, AX 4821d0 ANDQ DX, AX Some platforms such as arm64 already have SSA optimization rules to fuse two shift instructions back into an AND. Removing the general rule to rewrite AND to SHR+SHL speeds up this benchmark: var GlobalU uint func BenchmarkAndHighBits(b *testing.B) { x := uint(0) for i := 0; i < b.N; i++ { x &= 1 << 63 } GlobalU = x } amd64/darwin on Intel(R) Core(TM) i7-3520M CPU @ 2.90GHz: name old time/op new time/op delta AndHighBits-4 0.61ns ± 6% 0.42ns ± 6% -31.42% (p=0.000 n=25+25): 'go run run.go -all_codegen -v codegen' passes with following adjustments: ARM64: The BFXIL pattern ((x << lc) >> rc | y & ac) needed adjustment since ORshiftRL generation fusing '>> rc' and '|' interferes with matching ((x << lc) >> rc) to generate UBFX. Previously ORshiftLL was created first using the shifts generated for (y & ac). S390X: Add rules for abs and copysign to match use of AND instead of SHIFTs. Updates #33826 Updates #32781 Change-Id: I43227da76b625de03fbc51117162b23b9c678cdb Reviewed-on: https://go-review.googlesource.com/c/go/+/194297 Run-TryBot: Martin Möhrmann TryBot-Result: Gobot Gobot Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/ssa/gen/ARM64.rules | 7 +- src/cmd/compile/internal/ssa/gen/S390X.rules | 4 + .../compile/internal/ssa/gen/generic.rules | 8 - src/cmd/compile/internal/ssa/rewriteARM64.go | 27 + src/cmd/compile/internal/ssa/rewriteS390X.go | 754 ++++++++++++++---- .../compile/internal/ssa/rewritegeneric.go | 114 +-- test/codegen/math.go | 4 +- 7 files changed, 625 insertions(+), 293 deletions(-) diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index d4b47bfb0b..ddb504531c 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -1863,9 +1863,8 @@ (XORshiftLL [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c) -> (EXTRWconst [32-c] x2 x) -// Generic rules rewrite certain AND to a pair of shifts. -// However, on ARM64 the bitmask can fit into an instruction. -// Rewrite it back to AND. +// Rewrite special pairs of shifts to AND. +// On ARM64 the bitmask can fit into an instruction. (SRLconst [c] (SLLconst [c] x)) && 0 < c && c < 64 -> (ANDconst [1< (ANDconst [^(1< (BFXIL [bfc] y x) (ORshiftLL [sc] (UBFX [bfc] x) (SRLconst [sc] y)) && sc == getARM64BFwidth(bfc) -> (BFXIL [bfc] y x) +(ORshiftRL [rc] (ANDconst [ac] y) (SLLconst [lc] x)) && lc < rc && ac == ^((1< (BFXIL [armBFAuxInt(rc-lc, 64-rc)] y x) // do combined loads // little endian loads diff --git a/src/cmd/compile/internal/ssa/gen/S390X.rules b/src/cmd/compile/internal/ssa/gen/S390X.rules index 98bf875f80..6d8ce4b107 100644 --- a/src/cmd/compile/internal/ssa/gen/S390X.rules +++ b/src/cmd/compile/internal/ssa/gen/S390X.rules @@ -701,6 +701,8 @@ // may need to be reworked when NIHH/OIHH are added (SRDconst [1] (SLDconst [1] (LGDR x))) -> (LGDR (LPDFR x)) (LDGR (SRDconst [1] (SLDconst [1] x))) -> (LPDFR (LDGR x)) +(AND (MOVDconst [^(-1<<63)]) (LGDR x)) -> (LGDR (LPDFR x)) +(LDGR (AND (MOVDconst [^(-1<<63)]) x)) -> (LPDFR (LDGR x)) (OR (MOVDconst [-1<<63]) (LGDR x)) -> (LGDR (LNDFR x)) (LDGR (OR (MOVDconst [-1<<63]) x)) -> (LNDFR (LDGR x)) @@ -710,6 +712,8 @@ // detect copysign (OR (SLDconst [63] (SRDconst [63] (LGDR x))) (LGDR (LPDFR y))) -> (LGDR (CPSDR y x)) (OR (SLDconst [63] (SRDconst [63] (LGDR x))) (MOVDconst [c])) && c & -1<<63 == 0 -> (LGDR (CPSDR (FMOVDconst [c]) x)) +(OR (AND (MOVDconst [-1<<63]) (LGDR x)) (LGDR (LPDFR y))) -> (LGDR (CPSDR y x)) +(OR (AND (MOVDconst [-1<<63]) (LGDR x)) (MOVDconst [c])) && c & -1<<63 == 0 -> (LGDR (CPSDR (FMOVDconst [c]) x)) (CPSDR y (FMOVDconst [c])) && c & -1<<63 == 0 -> (LPDFR y) (CPSDR y (FMOVDconst [c])) && c & -1<<63 != 0 -> (LNDFR y) diff --git a/src/cmd/compile/internal/ssa/gen/generic.rules b/src/cmd/compile/internal/ssa/gen/generic.rules index ef5d7a63ff..8696464a70 100644 --- a/src/cmd/compile/internal/ssa/gen/generic.rules +++ b/src/cmd/compile/internal/ssa/gen/generic.rules @@ -542,14 +542,6 @@ (Slicemask (Const64 [x])) && x > 0 -> (Const64 [-1]) (Slicemask (Const64 [0])) -> (Const64 [0]) -// Rewrite AND of consts as shifts if possible, slightly faster for 64 bit operands -// leading zeros can be shifted left, then right -(And64 (Const64 [y]) x) && nlz(y) + nto(y) == 64 && nto(y) >= 32 - -> (Rsh64Ux64 (Lsh64x64 x (Const64 [nlz(y)])) (Const64 [nlz(y)])) -// trailing zeros can be shifted right, then left -(And64 (Const64 [y]) x) && nlo(y) + ntz(y) == 64 && ntz(y) >= 32 - -> (Lsh64x64 (Rsh64Ux64 x (Const64 [ntz(y)])) (Const64 [ntz(y)])) - // simplifications often used for lengths. e.g. len(s[i:i+5])==5 (Sub(64|32|16|8) (Add(64|32|16|8) x y) x) -> y (Sub(64|32|16|8) (Add(64|32|16|8) x y) y) -> x diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index fc677726dc..06019ebbd8 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -28467,6 +28467,33 @@ func rewriteValueARM64_OpARM64ORshiftRL_0(v *Value) bool { v.AddArg(y) return true } + // match: (ORshiftRL [rc] (ANDconst [ac] y) (SLLconst [lc] x)) + // cond: lc < rc && ac == ^((1< x)) + // cond: + // result: (LGDR (LPDFR x)) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpS390XMOVDconst { + break + } + if v_0.AuxInt != ^(-1 << 63) { + break + } + v_1 := v.Args[1] + if v_1.Op != OpS390XLGDR { + break + } + t := v_1.Type + x := v_1.Args[0] + v.reset(OpS390XLGDR) + v.Type = t + v0 := b.NewValue0(v.Pos, OpS390XLPDFR, x.Type) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (AND (LGDR x) (MOVDconst [^(-1<<63)])) + // cond: + // result: (LGDR (LPDFR x)) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpS390XLGDR { + break + } + t := v_0.Type + x := v_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVDconst { + break + } + if v_1.AuxInt != ^(-1 << 63) { + break + } + v.reset(OpS390XLGDR) + v.Type = t + v0 := b.NewValue0(v.Pos, OpS390XLPDFR, x.Type) + v0.AddArg(x) + v.AddArg(v0) + return true + } // match: (AND (MOVDconst [c]) (MOVDconst [d])) // cond: // result: (MOVDconst [c&d]) @@ -10546,6 +10597,53 @@ func rewriteValueS390X_OpS390XLDGR_0(v *Value) bool { v.AddArg(v0) return true } + // match: (LDGR (AND (MOVDconst [^(-1<<63)]) x)) + // cond: + // result: (LPDFR (LDGR x)) + for { + t := v.Type + v_0 := v.Args[0] + if v_0.Op != OpS390XAND { + break + } + x := v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpS390XMOVDconst { + break + } + if v_0_0.AuxInt != ^(-1 << 63) { + break + } + v.reset(OpS390XLPDFR) + v0 := b.NewValue0(v.Pos, OpS390XLDGR, t) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (LDGR (AND x (MOVDconst [^(-1<<63)]))) + // cond: + // result: (LPDFR (LDGR x)) + for { + t := v.Type + v_0 := v.Args[0] + if v_0.Op != OpS390XAND { + break + } + _ = v_0.Args[1] + x := v_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpS390XMOVDconst { + break + } + if v_0_1.AuxInt != ^(-1 << 63) { + break + } + v.reset(OpS390XLPDFR) + v0 := b.NewValue0(v.Pos, OpS390XLDGR, t) + v0.AddArg(x) + v.AddArg(v0) + return true + } // match: (LDGR (OR (MOVDconst [-1<<63]) x)) // cond: // result: (LNDFR (LDGR x)) @@ -22827,147 +22925,463 @@ func rewriteValueS390X_OpS390XOR_0(v *Value) bool { } func rewriteValueS390X_OpS390XOR_10(v *Value) bool { b := v.Block - typ := &b.Func.Config.Types - // match: (OR (MOVDconst [c]) (MOVDconst [d])) + // match: (OR (AND (MOVDconst [-1<<63]) (LGDR x)) (LGDR (LPDFR y))) // cond: - // result: (MOVDconst [c|d]) + // result: (LGDR (CPSDR y x)) for { _ = v.Args[1] v_0 := v.Args[0] - if v_0.Op != OpS390XMOVDconst { + if v_0.Op != OpS390XAND { break } - c := v_0.AuxInt + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpS390XMOVDconst { + break + } + if v_0_0.AuxInt != -1<<63 { + break + } + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpS390XLGDR { + break + } + x := v_0_1.Args[0] v_1 := v.Args[1] - if v_1.Op != OpS390XMOVDconst { + if v_1.Op != OpS390XLGDR { break } - d := v_1.AuxInt - v.reset(OpS390XMOVDconst) - v.AuxInt = c | d + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpS390XLPDFR { + break + } + t := v_1_0.Type + y := v_1_0.Args[0] + v.reset(OpS390XLGDR) + v0 := b.NewValue0(v.Pos, OpS390XCPSDR, t) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (OR (MOVDconst [d]) (MOVDconst [c])) + // match: (OR (AND (LGDR x) (MOVDconst [-1<<63])) (LGDR (LPDFR y))) // cond: - // result: (MOVDconst [c|d]) + // result: (LGDR (CPSDR y x)) for { _ = v.Args[1] v_0 := v.Args[0] - if v_0.Op != OpS390XMOVDconst { + if v_0.Op != OpS390XAND { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpS390XLGDR { + break + } + x := v_0_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpS390XMOVDconst { + break + } + if v_0_1.AuxInt != -1<<63 { break } - d := v_0.AuxInt v_1 := v.Args[1] - if v_1.Op != OpS390XMOVDconst { + if v_1.Op != OpS390XLGDR { break } - c := v_1.AuxInt - v.reset(OpS390XMOVDconst) - v.AuxInt = c | d - return true - } - // match: (OR x x) - // cond: - // result: x - for { - x := v.Args[1] - if x != v.Args[0] { + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpS390XLPDFR { break } - v.reset(OpCopy) - v.Type = x.Type - v.AddArg(x) + t := v_1_0.Type + y := v_1_0.Args[0] + v.reset(OpS390XLGDR) + v0 := b.NewValue0(v.Pos, OpS390XCPSDR, t) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (OR x g:(MOVDload [off] {sym} ptr mem)) - // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g) - // result: (ORload [off] {sym} x ptr mem) + // match: (OR (LGDR (LPDFR y)) (AND (MOVDconst [-1<<63]) (LGDR x))) + // cond: + // result: (LGDR (CPSDR y x)) for { - t := v.Type _ = v.Args[1] - x := v.Args[0] - g := v.Args[1] - if g.Op != OpS390XMOVDload { + v_0 := v.Args[0] + if v_0.Op != OpS390XLGDR { break } - off := g.AuxInt - sym := g.Aux - mem := g.Args[1] - ptr := g.Args[0] - if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)) { + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpS390XLPDFR { break } - v.reset(OpS390XORload) - v.Type = t - v.AuxInt = off - v.Aux = sym - v.AddArg(x) - v.AddArg(ptr) - v.AddArg(mem) - return true - } - // match: (OR g:(MOVDload [off] {sym} ptr mem) x) - // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g) - // result: (ORload [off] {sym} x ptr mem) - for { - t := v.Type - x := v.Args[1] - g := v.Args[0] - if g.Op != OpS390XMOVDload { + t := v_0_0.Type + y := v_0_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XAND { break } - off := g.AuxInt - sym := g.Aux - mem := g.Args[1] - ptr := g.Args[0] - if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)) { + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpS390XMOVDconst { break } - v.reset(OpS390XORload) - v.Type = t - v.AuxInt = off - v.Aux = sym - v.AddArg(x) - v.AddArg(ptr) - v.AddArg(mem) - return true - } - // match: (OR g:(MOVDload [off] {sym} ptr mem) x) - // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g) - // result: (ORload [off] {sym} x ptr mem) - for { - t := v.Type - x := v.Args[1] - g := v.Args[0] - if g.Op != OpS390XMOVDload { + if v_1_0.AuxInt != -1<<63 { break } - off := g.AuxInt - sym := g.Aux - mem := g.Args[1] - ptr := g.Args[0] - if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)) { + v_1_1 := v_1.Args[1] + if v_1_1.Op != OpS390XLGDR { break } - v.reset(OpS390XORload) - v.Type = t - v.AuxInt = off - v.Aux = sym - v.AddArg(x) - v.AddArg(ptr) - v.AddArg(mem) + x := v_1_1.Args[0] + v.reset(OpS390XLGDR) + v0 := b.NewValue0(v.Pos, OpS390XCPSDR, t) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (OR x g:(MOVDload [off] {sym} ptr mem)) - // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g) - // result: (ORload [off] {sym} x ptr mem) + // match: (OR (LGDR (LPDFR y)) (AND (LGDR x) (MOVDconst [-1<<63]))) + // cond: + // result: (LGDR (CPSDR y x)) for { - t := v.Type _ = v.Args[1] - x := v.Args[0] - g := v.Args[1] - if g.Op != OpS390XMOVDload { - break + v_0 := v.Args[0] + if v_0.Op != OpS390XLGDR { + break + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpS390XLPDFR { + break + } + t := v_0_0.Type + y := v_0_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XAND { + break + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpS390XLGDR { + break + } + x := v_1_0.Args[0] + v_1_1 := v_1.Args[1] + if v_1_1.Op != OpS390XMOVDconst { + break + } + if v_1_1.AuxInt != -1<<63 { + break + } + v.reset(OpS390XLGDR) + v0 := b.NewValue0(v.Pos, OpS390XCPSDR, t) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (OR (AND (MOVDconst [-1<<63]) (LGDR x)) (MOVDconst [c])) + // cond: c & -1<<63 == 0 + // result: (LGDR (CPSDR (FMOVDconst [c]) x)) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpS390XAND { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpS390XMOVDconst { + break + } + if v_0_0.AuxInt != -1<<63 { + break + } + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpS390XLGDR { + break + } + x := v_0_1.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVDconst { + break + } + c := v_1.AuxInt + if !(c&-1<<63 == 0) { + break + } + v.reset(OpS390XLGDR) + v0 := b.NewValue0(v.Pos, OpS390XCPSDR, x.Type) + v1 := b.NewValue0(v.Pos, OpS390XFMOVDconst, x.Type) + v1.AuxInt = c + v0.AddArg(v1) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (OR (AND (LGDR x) (MOVDconst [-1<<63])) (MOVDconst [c])) + // cond: c & -1<<63 == 0 + // result: (LGDR (CPSDR (FMOVDconst [c]) x)) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpS390XAND { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpS390XLGDR { + break + } + x := v_0_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpS390XMOVDconst { + break + } + if v_0_1.AuxInt != -1<<63 { + break + } + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVDconst { + break + } + c := v_1.AuxInt + if !(c&-1<<63 == 0) { + break + } + v.reset(OpS390XLGDR) + v0 := b.NewValue0(v.Pos, OpS390XCPSDR, x.Type) + v1 := b.NewValue0(v.Pos, OpS390XFMOVDconst, x.Type) + v1.AuxInt = c + v0.AddArg(v1) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (OR (MOVDconst [c]) (AND (MOVDconst [-1<<63]) (LGDR x))) + // cond: c & -1<<63 == 0 + // result: (LGDR (CPSDR (FMOVDconst [c]) x)) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpS390XMOVDconst { + break + } + c := v_0.AuxInt + v_1 := v.Args[1] + if v_1.Op != OpS390XAND { + break + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpS390XMOVDconst { + break + } + if v_1_0.AuxInt != -1<<63 { + break + } + v_1_1 := v_1.Args[1] + if v_1_1.Op != OpS390XLGDR { + break + } + x := v_1_1.Args[0] + if !(c&-1<<63 == 0) { + break + } + v.reset(OpS390XLGDR) + v0 := b.NewValue0(v.Pos, OpS390XCPSDR, x.Type) + v1 := b.NewValue0(v.Pos, OpS390XFMOVDconst, x.Type) + v1.AuxInt = c + v0.AddArg(v1) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (OR (MOVDconst [c]) (AND (LGDR x) (MOVDconst [-1<<63]))) + // cond: c & -1<<63 == 0 + // result: (LGDR (CPSDR (FMOVDconst [c]) x)) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpS390XMOVDconst { + break + } + c := v_0.AuxInt + v_1 := v.Args[1] + if v_1.Op != OpS390XAND { + break + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpS390XLGDR { + break + } + x := v_1_0.Args[0] + v_1_1 := v_1.Args[1] + if v_1_1.Op != OpS390XMOVDconst { + break + } + if v_1_1.AuxInt != -1<<63 { + break + } + if !(c&-1<<63 == 0) { + break + } + v.reset(OpS390XLGDR) + v0 := b.NewValue0(v.Pos, OpS390XCPSDR, x.Type) + v1 := b.NewValue0(v.Pos, OpS390XFMOVDconst, x.Type) + v1.AuxInt = c + v0.AddArg(v1) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (OR (MOVDconst [c]) (MOVDconst [d])) + // cond: + // result: (MOVDconst [c|d]) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpS390XMOVDconst { + break + } + c := v_0.AuxInt + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVDconst { + break + } + d := v_1.AuxInt + v.reset(OpS390XMOVDconst) + v.AuxInt = c | d + return true + } + // match: (OR (MOVDconst [d]) (MOVDconst [c])) + // cond: + // result: (MOVDconst [c|d]) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpS390XMOVDconst { + break + } + d := v_0.AuxInt + v_1 := v.Args[1] + if v_1.Op != OpS390XMOVDconst { + break + } + c := v_1.AuxInt + v.reset(OpS390XMOVDconst) + v.AuxInt = c | d + return true + } + return false +} +func rewriteValueS390X_OpS390XOR_20(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (OR x x) + // cond: + // result: x + for { + x := v.Args[1] + if x != v.Args[0] { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + // match: (OR x g:(MOVDload [off] {sym} ptr mem)) + // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g) + // result: (ORload [off] {sym} x ptr mem) + for { + t := v.Type + _ = v.Args[1] + x := v.Args[0] + g := v.Args[1] + if g.Op != OpS390XMOVDload { + break + } + off := g.AuxInt + sym := g.Aux + mem := g.Args[1] + ptr := g.Args[0] + if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)) { + break + } + v.reset(OpS390XORload) + v.Type = t + v.AuxInt = off + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (OR g:(MOVDload [off] {sym} ptr mem) x) + // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g) + // result: (ORload [off] {sym} x ptr mem) + for { + t := v.Type + x := v.Args[1] + g := v.Args[0] + if g.Op != OpS390XMOVDload { + break + } + off := g.AuxInt + sym := g.Aux + mem := g.Args[1] + ptr := g.Args[0] + if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)) { + break + } + v.reset(OpS390XORload) + v.Type = t + v.AuxInt = off + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (OR g:(MOVDload [off] {sym} ptr mem) x) + // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g) + // result: (ORload [off] {sym} x ptr mem) + for { + t := v.Type + x := v.Args[1] + g := v.Args[0] + if g.Op != OpS390XMOVDload { + break + } + off := g.AuxInt + sym := g.Aux + mem := g.Args[1] + ptr := g.Args[0] + if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)) { + break + } + v.reset(OpS390XORload) + v.Type = t + v.AuxInt = off + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (OR x g:(MOVDload [off] {sym} ptr mem)) + // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g) + // result: (ORload [off] {sym} x ptr mem) + for { + t := v.Type + _ = v.Args[1] + x := v.Args[0] + g := v.Args[1] + if g.Op != OpS390XMOVDload { + break } off := g.AuxInt sym := g.Aux @@ -23129,11 +23543,6 @@ func rewriteValueS390X_OpS390XOR_10(v *Value) bool { v0.AddArg(mem) return true } - return false -} -func rewriteValueS390X_OpS390XOR_20(v *Value) bool { - b := v.Block - typ := &b.Func.Config.Types // match: (OR sh:(SLDconst [16] x0:(MOVHZload [i0] {s} p mem)) x1:(MOVHZload [i1] {s} p mem)) // cond: i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem) @@ -23230,6 +23639,11 @@ func rewriteValueS390X_OpS390XOR_20(v *Value) bool { v0.AddArg(mem) return true } + return false +} +func rewriteValueS390X_OpS390XOR_30(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types // match: (OR sh:(SLDconst [32] x0:(MOVWZload [i0] {s} p mem)) x1:(MOVWZload [i1] {s} p mem)) // cond: i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVDload [i0] {s} p mem) @@ -23715,11 +24129,6 @@ func rewriteValueS390X_OpS390XOR_20(v *Value) bool { v0.AddArg(y) return true } - return false -} -func rewriteValueS390X_OpS390XOR_30(v *Value) bool { - b := v.Block - typ := &b.Func.Config.Types // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVHZload [i1] {s} p mem))) s0:(SLDconst [j0] x0:(MOVHZload [i0] {s} p mem))) // cond: i1 == i0+2 && j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1,y) (OR (SLDconst [j1] (MOVWZload [i0] {s} p mem)) y) @@ -23836,6 +24245,11 @@ func rewriteValueS390X_OpS390XOR_30(v *Value) bool { v0.AddArg(mem) return true } + return false +} +func rewriteValueS390X_OpS390XOR_40(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types // match: (OR x1:(MOVBZloadidx [i1] {s} idx p mem) sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem))) // cond: i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem) @@ -24260,11 +24674,6 @@ func rewriteValueS390X_OpS390XOR_30(v *Value) bool { v0.AddArg(mem) return true } - return false -} -func rewriteValueS390X_OpS390XOR_40(v *Value) bool { - b := v.Block - typ := &b.Func.Config.Types // match: (OR x1:(MOVHZloadidx [i1] {s} idx p mem) sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem))) // cond: i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem) @@ -24371,6 +24780,11 @@ func rewriteValueS390X_OpS390XOR_40(v *Value) bool { v0.AddArg(mem) return true } + return false +} +func rewriteValueS390X_OpS390XOR_50(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types // match: (OR x1:(MOVHZloadidx [i1] {s} idx p mem) sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} idx p mem))) // cond: i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem) @@ -24795,11 +25209,6 @@ func rewriteValueS390X_OpS390XOR_40(v *Value) bool { v0.AddArg(mem) return true } - return false -} -func rewriteValueS390X_OpS390XOR_50(v *Value) bool { - b := v.Block - typ := &b.Func.Config.Types // match: (OR x1:(MOVWZloadidx [i1] {s} idx p mem) sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} idx p mem))) // cond: i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem) @@ -24906,6 +25315,11 @@ func rewriteValueS390X_OpS390XOR_50(v *Value) bool { v0.AddArg(mem) return true } + return false +} +func rewriteValueS390X_OpS390XOR_60(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types // match: (OR sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} idx p mem)) x1:(MOVWZloadidx [i1] {s} p idx mem)) // cond: i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem) @@ -25401,11 +25815,6 @@ func rewriteValueS390X_OpS390XOR_50(v *Value) bool { v0.AddArg(y) return true } - return false -} -func rewriteValueS390X_OpS390XOR_60(v *Value) bool { - b := v.Block - typ := &b.Func.Config.Types // match: (OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)))) // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1,y) (OR (SLDconst [j1] (MOVHZloadidx [i0] {s} p idx mem)) y) @@ -25542,6 +25951,11 @@ func rewriteValueS390X_OpS390XOR_60(v *Value) bool { v0.AddArg(y) return true } + return false +} +func rewriteValueS390X_OpS390XOR_70(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types // match: (OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)))) // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1,y) (OR (SLDconst [j1] (MOVHZloadidx [i0] {s} p idx mem)) y) @@ -26082,11 +26496,6 @@ func rewriteValueS390X_OpS390XOR_60(v *Value) bool { v0.AddArg(y) return true } - return false -} -func rewriteValueS390X_OpS390XOR_70(v *Value) bool { - b := v.Block - typ := &b.Func.Config.Types // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem))) s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem))) // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1,y) (OR (SLDconst [j1] (MOVHZloadidx [i0] {s} p idx mem)) y) @@ -26222,6 +26631,11 @@ func rewriteValueS390X_OpS390XOR_70(v *Value) bool { v0.AddArg(y) return true } + return false +} +func rewriteValueS390X_OpS390XOR_80(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types // match: (OR s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} idx p mem)) or:(OR s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem)) y)) // cond: i1 == i0+2 && j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1,y) (OR (SLDconst [j1] (MOVWZloadidx [i0] {s} p idx mem)) y) @@ -26762,11 +27176,6 @@ func rewriteValueS390X_OpS390XOR_70(v *Value) bool { v0.AddArg(y) return true } - return false -} -func rewriteValueS390X_OpS390XOR_80(v *Value) bool { - b := v.Block - typ := &b.Func.Config.Types // match: (OR or:(OR s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} idx p mem)) y) s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem))) // cond: i1 == i0+2 && j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1,y) (OR (SLDconst [j1] (MOVWZloadidx [i0] {s} p idx mem)) y) @@ -26902,6 +27311,11 @@ func rewriteValueS390X_OpS390XOR_80(v *Value) bool { v0.AddArg(y) return true } + return false +} +func rewriteValueS390X_OpS390XOR_90(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} idx p mem))) s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem))) // cond: i1 == i0+2 && j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1,y) (OR (SLDconst [j1] (MOVWZloadidx [i0] {s} p idx mem)) y) @@ -27398,11 +27812,6 @@ func rewriteValueS390X_OpS390XOR_80(v *Value) bool { v0.AddArg(v1) return true } - return false -} -func rewriteValueS390X_OpS390XOR_90(v *Value) bool { - b := v.Block - typ := &b.Func.Config.Types // match: (OR sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))) r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))) // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRload [i0] {s} p mem)) @@ -27517,6 +27926,11 @@ func rewriteValueS390X_OpS390XOR_90(v *Value) bool { v0.AddArg(mem) return true } + return false +} +func rewriteValueS390X_OpS390XOR_100(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types // match: (OR sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRload [i1] {s} p mem))) r0:(MOVWZreg x0:(MOVWBRload [i0] {s} p mem))) // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVDBRload [i0] {s} p mem) @@ -28048,11 +28462,6 @@ func rewriteValueS390X_OpS390XOR_90(v *Value) bool { v0.AddArg(y) return true } - return false -} -func rewriteValueS390X_OpS390XOR_100(v *Value) bool { - b := v.Block - typ := &b.Func.Config.Types // match: (OR or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)))) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem)))) // cond: i1 == i0+2 && j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1,y) (OR (SLDconst [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y) @@ -28181,6 +28590,11 @@ func rewriteValueS390X_OpS390XOR_100(v *Value) bool { v0.AddArg(v1) return true } + return false +} +func rewriteValueS390X_OpS390XOR_110(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types // match: (OR x0:(MOVBZloadidx [i0] {s} idx p mem) sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem))) // cond: p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem)) @@ -28629,11 +29043,6 @@ func rewriteValueS390X_OpS390XOR_100(v *Value) bool { v0.AddArg(v1) return true } - return false -} -func rewriteValueS390X_OpS390XOR_110(v *Value) bool { - b := v.Block - typ := &b.Func.Config.Types // match: (OR r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)) sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem)))) // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem)) @@ -28760,6 +29169,11 @@ func rewriteValueS390X_OpS390XOR_110(v *Value) bool { v0.AddArg(v1) return true } + return false +} +func rewriteValueS390X_OpS390XOR_120(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types // match: (OR r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)) sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem)))) // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem)) @@ -29258,11 +29672,6 @@ func rewriteValueS390X_OpS390XOR_110(v *Value) bool { v0.AddArg(mem) return true } - return false -} -func rewriteValueS390X_OpS390XOR_120(v *Value) bool { - b := v.Block - typ := &b.Func.Config.Types // match: (OR r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} idx p mem)) sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} idx p mem)))) // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem) @@ -29385,6 +29794,11 @@ func rewriteValueS390X_OpS390XOR_120(v *Value) bool { v0.AddArg(mem) return true } + return false +} +func rewriteValueS390X_OpS390XOR_130(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types // match: (OR sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} idx p mem))) r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} p idx mem))) // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh) // result: @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem) @@ -29914,11 +30328,6 @@ func rewriteValueS390X_OpS390XOR_120(v *Value) bool { v0.AddArg(y) return true } - return false -} -func rewriteValueS390X_OpS390XOR_130(v *Value) bool { - b := v.Block - typ := &b.Func.Config.Types // match: (OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) or:(OR y s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)))) // cond: p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1,y) (OR (SLDconst [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y) @@ -30059,6 +30468,11 @@ func rewriteValueS390X_OpS390XOR_130(v *Value) bool { v0.AddArg(y) return true } + return false +} +func rewriteValueS390X_OpS390XOR_140(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types // match: (OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) or:(OR y s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)))) // cond: p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1,y) (OR (SLDconst [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y) @@ -30615,11 +31029,6 @@ func rewriteValueS390X_OpS390XOR_130(v *Value) bool { v0.AddArg(y) return true } - return false -} -func rewriteValueS390X_OpS390XOR_140(v *Value) bool { - b := v.Block - typ := &b.Func.Config.Types // match: (OR or:(OR y s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem))) s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem))) // cond: p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1,y) (OR (SLDconst [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y) @@ -30767,6 +31176,11 @@ func rewriteValueS390X_OpS390XOR_140(v *Value) bool { v0.AddArg(y) return true } + return false +} +func rewriteValueS390X_OpS390XOR_150(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))) or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))) y)) // cond: i1 == i0+2 && j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1,y) (OR (SLDconst [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y) @@ -31387,11 +31801,6 @@ func rewriteValueS390X_OpS390XOR_140(v *Value) bool { v0.AddArg(y) return true } - return false -} -func rewriteValueS390X_OpS390XOR_150(v *Value) bool { - b := v.Block - typ := &b.Func.Config.Types // match: (OR or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem))) y) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem)))) // cond: i1 == i0+2 && j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1,y) (OR (SLDconst [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y) @@ -31547,6 +31956,11 @@ func rewriteValueS390X_OpS390XOR_150(v *Value) bool { v0.AddArg(y) return true } + return false +} +func rewriteValueS390X_OpS390XOR_160(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types // match: (OR or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)))) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem)))) // cond: i1 == i0+2 && j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or) // result: @mergePoint(b,x0,x1,y) (OR (SLDconst [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y) diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go index 8aa07d20db..a2d091d3d6 100644 --- a/src/cmd/compile/internal/ssa/rewritegeneric.go +++ b/src/cmd/compile/internal/ssa/rewritegeneric.go @@ -5735,112 +5735,6 @@ func rewriteValuegeneric_OpAnd64_10(v *Value) bool { v.AddArg(y) return true } - // match: (And64 (Const64 [y]) x) - // cond: nlz(y) + nto(y) == 64 && nto(y) >= 32 - // result: (Rsh64Ux64 (Lsh64x64 x (Const64 [nlz(y)])) (Const64 [nlz(y)])) - for { - t := v.Type - x := v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpConst64 { - break - } - y := v_0.AuxInt - if !(nlz(y)+nto(y) == 64 && nto(y) >= 32) { - break - } - v.reset(OpRsh64Ux64) - v0 := b.NewValue0(v.Pos, OpLsh64x64, t) - v0.AddArg(x) - v1 := b.NewValue0(v.Pos, OpConst64, t) - v1.AuxInt = nlz(y) - v0.AddArg(v1) - v.AddArg(v0) - v2 := b.NewValue0(v.Pos, OpConst64, t) - v2.AuxInt = nlz(y) - v.AddArg(v2) - return true - } - // match: (And64 x (Const64 [y])) - // cond: nlz(y) + nto(y) == 64 && nto(y) >= 32 - // result: (Rsh64Ux64 (Lsh64x64 x (Const64 [nlz(y)])) (Const64 [nlz(y)])) - for { - t := v.Type - _ = v.Args[1] - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpConst64 { - break - } - y := v_1.AuxInt - if !(nlz(y)+nto(y) == 64 && nto(y) >= 32) { - break - } - v.reset(OpRsh64Ux64) - v0 := b.NewValue0(v.Pos, OpLsh64x64, t) - v0.AddArg(x) - v1 := b.NewValue0(v.Pos, OpConst64, t) - v1.AuxInt = nlz(y) - v0.AddArg(v1) - v.AddArg(v0) - v2 := b.NewValue0(v.Pos, OpConst64, t) - v2.AuxInt = nlz(y) - v.AddArg(v2) - return true - } - // match: (And64 (Const64 [y]) x) - // cond: nlo(y) + ntz(y) == 64 && ntz(y) >= 32 - // result: (Lsh64x64 (Rsh64Ux64 x (Const64 [ntz(y)])) (Const64 [ntz(y)])) - for { - t := v.Type - x := v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpConst64 { - break - } - y := v_0.AuxInt - if !(nlo(y)+ntz(y) == 64 && ntz(y) >= 32) { - break - } - v.reset(OpLsh64x64) - v0 := b.NewValue0(v.Pos, OpRsh64Ux64, t) - v0.AddArg(x) - v1 := b.NewValue0(v.Pos, OpConst64, t) - v1.AuxInt = ntz(y) - v0.AddArg(v1) - v.AddArg(v0) - v2 := b.NewValue0(v.Pos, OpConst64, t) - v2.AuxInt = ntz(y) - v.AddArg(v2) - return true - } - // match: (And64 x (Const64 [y])) - // cond: nlo(y) + ntz(y) == 64 && ntz(y) >= 32 - // result: (Lsh64x64 (Rsh64Ux64 x (Const64 [ntz(y)])) (Const64 [ntz(y)])) - for { - t := v.Type - _ = v.Args[1] - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpConst64 { - break - } - y := v_1.AuxInt - if !(nlo(y)+ntz(y) == 64 && ntz(y) >= 32) { - break - } - v.reset(OpLsh64x64) - v0 := b.NewValue0(v.Pos, OpRsh64Ux64, t) - v0.AddArg(x) - v1 := b.NewValue0(v.Pos, OpConst64, t) - v1.AuxInt = ntz(y) - v0.AddArg(v1) - v.AddArg(v0) - v2 := b.NewValue0(v.Pos, OpConst64, t) - v2.AuxInt = ntz(y) - v.AddArg(v2) - return true - } // match: (And64 (And64 i:(Const64 ) z) x) // cond: (z.Op != OpConst64 && x.Op != OpConst64) // result: (And64 i (And64 z x)) @@ -5867,10 +5761,6 @@ func rewriteValuegeneric_OpAnd64_10(v *Value) bool { v.AddArg(v0) return true } - return false -} -func rewriteValuegeneric_OpAnd64_20(v *Value) bool { - b := v.Block // match: (And64 (And64 z i:(Const64 )) x) // cond: (z.Op != OpConst64 && x.Op != OpConst64) // result: (And64 i (And64 z x)) @@ -5984,6 +5874,10 @@ func rewriteValuegeneric_OpAnd64_20(v *Value) bool { v.AddArg(x) return true } + return false +} +func rewriteValuegeneric_OpAnd64_20(v *Value) bool { + b := v.Block // match: (And64 (Const64 [c]) (And64 x (Const64 [d]))) // cond: // result: (And64 (Const64 [c&d]) x) diff --git a/test/codegen/math.go b/test/codegen/math.go index 36252710d1..8aa652595b 100644 --- a/test/codegen/math.go +++ b/test/codegen/math.go @@ -81,7 +81,7 @@ func abs32(x float32) float32 { // Check that it's using integer registers func copysign(a, b, c float64) { - // amd64:"BTRQ\t[$]63","SHRQ\t[$]63","SHLQ\t[$]63","ORQ" + // amd64:"BTRQ\t[$]63","ANDQ","ORQ" // s390x:"CPSDR",-"MOVD" (no integer load/store) // ppc64:"FCPSGN" // ppc64le:"FCPSGN" @@ -100,7 +100,7 @@ func copysign(a, b, c float64) { // s390x:"LNDFR\t",-"MOVD\t" (no integer load/store) sink64[2] = math.Float64frombits(math.Float64bits(a) | 1<<63) - // amd64:-"SHLQ\t[$]1",-"SHRQ\t[$]1","SHRQ\t[$]63","SHLQ\t[$]63","ORQ" + // amd64:"ANDQ","ORQ" // s390x:"CPSDR\t",-"MOVD\t" (no integer load/store) // ppc64:"FCPSGN" // ppc64le:"FCPSGN" -- 2.50.0