From: Wei Xiao Date: Mon, 5 Mar 2018 03:50:25 +0000 (+0000) Subject: cmd/compile/internal/ssa: improve store combine optimization on arm64 X-Git-Tag: go1.11beta1~1323 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=05962561ae490ae006377a7b8d0fd1495ad6eb2e;p=gostls13.git cmd/compile/internal/ssa: improve store combine optimization on arm64 Current implementation doesn't consider MOVDreg type operand and fail to combine it into larger store. This patch fixes the issue. Fixes #24242 Change-Id: I7d68697f80e76f48c3528ece01a602bf513248ec Reviewed-on: https://go-review.googlesource.com/98397 Run-TryBot: Giovanni Bajo TryBot-Result: Gobot Gobot Reviewed-by: Cherry Zhang --- diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index 263ccba548..c72ab54032 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -1520,6 +1520,11 @@ && isSamePtr(ptr0, ptr1) && clobber(x) -> (MOVHstore [i-1] {s} ptr0 w mem) +(MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem)) + && x.Uses == 1 + && isSamePtr(ptr0, ptr1) + && clobber(x) + -> (MOVHstore [i-1] {s} ptr0 w mem) (MOVBstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] w) mem)) && x.Uses == 1 && isSamePtr(ptr0, ptr1) @@ -1530,6 +1535,11 @@ && isSamePtr(ptr0, ptr1) && clobber(x) -> (MOVHstore [i-1] {s} ptr0 w0 mem) +(MOVBstore [i] {s} ptr0 (SRLconst [j] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] (MOVDreg w)) mem)) + && x.Uses == 1 + && isSamePtr(ptr0, ptr1) + && clobber(x) + -> (MOVHstore [i-1] {s} ptr0 w0 mem) (MOVHstore [i] {s} ptr0 (SRLconst [16] w) x:(MOVHstore [i-2] {s} ptr1 w mem)) && x.Uses == 1 && isSamePtr(ptr0, ptr1) @@ -1540,6 +1550,11 @@ && isSamePtr(ptr0, ptr1) && clobber(x) -> (MOVWstore [i-2] {s} ptr0 w mem) +(MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVDreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem)) + && x.Uses == 1 + && isSamePtr(ptr0, ptr1) + && clobber(x) + -> (MOVWstore [i-2] {s} ptr0 w mem) (MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem)) && x.Uses == 1 && isSamePtr(ptr0, ptr1) @@ -1589,6 +1604,17 @@ && clobber(x1) && clobber(x2) -> (MOVWstore [i-3] {s} ptr (REVW w) mem) +(MOVBstore [i] {s} ptr w + x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) + x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w)) + x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] (MOVDreg w)) mem)))) + && x0.Uses == 1 + && x1.Uses == 1 + && x2.Uses == 1 + && clobber(x0) + && clobber(x1) + && clobber(x2) + -> (MOVWstore [i-3] {s} ptr (REVW w) mem) (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) @@ -1608,10 +1634,18 @@ && x.Uses == 1 && clobber(x) -> (MOVHstore [i-1] {s} ptr (REV16W w) mem) +(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVHstore [i-1] {s} ptr (REV16W w) mem) (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVWUreg w)) mem)) && x.Uses == 1 && clobber(x) -> (MOVHstore [i-1] {s} ptr (REV16W w) mem) +(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVHstore [i-1] {s} ptr (REV16W w) mem) // FP simplification (FNEGS (FMULS x y)) -> (FNMULS x y) diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index d7ade01de4..ee92c2f809 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -138,7 +138,7 @@ func rewriteValueARM64(v *Value) bool { case OpARM64MOVBreg: return rewriteValueARM64_OpARM64MOVBreg_0(v) case OpARM64MOVBstore: - return rewriteValueARM64_OpARM64MOVBstore_0(v) || rewriteValueARM64_OpARM64MOVBstore_10(v) + return rewriteValueARM64_OpARM64MOVBstore_0(v) || rewriteValueARM64_OpARM64MOVBstore_10(v) || rewriteValueARM64_OpARM64MOVBstore_20(v) case OpARM64MOVBstorezero: return rewriteValueARM64_OpARM64MOVBstorezero_0(v) case OpARM64MOVDload: @@ -158,7 +158,7 @@ func rewriteValueARM64(v *Value) bool { case OpARM64MOVHreg: return rewriteValueARM64_OpARM64MOVHreg_0(v) case OpARM64MOVHstore: - return rewriteValueARM64_OpARM64MOVHstore_0(v) + return rewriteValueARM64_OpARM64MOVHstore_0(v) || rewriteValueARM64_OpARM64MOVHstore_10(v) case OpARM64MOVHstorezero: return rewriteValueARM64_OpARM64MOVHstorezero_0(v) case OpARM64MOVQstorezero: @@ -6221,6 +6221,53 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem)) + // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) + // result: (MOVHstore [i-1] {s} ptr0 w mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[2] + ptr0 := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64SRLconst { + break + } + if v_1.AuxInt != 8 { + break + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpARM64MOVDreg { + break + } + w := v_1_0.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVBstore { + break + } + if x.AuxInt != i-1 { + break + } + if x.Aux != s { + break + } + _ = x.Args[2] + ptr1 := x.Args[0] + if w != x.Args[1] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) { + break + } + v.reset(OpARM64MOVHstore) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg(ptr0) + v.AddArg(w) + v.AddArg(mem) + return true + } // match: (MOVBstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] w) mem)) // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) // result: (MOVHstore [i-1] {s} ptr0 w0 mem) @@ -6325,6 +6372,62 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [i] {s} ptr0 (SRLconst [j] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] (MOVDreg w)) mem)) + // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) + // result: (MOVHstore [i-1] {s} ptr0 w0 mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[2] + ptr0 := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64SRLconst { + break + } + j := v_1.AuxInt + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpARM64MOVDreg { + break + } + w := v_1_0.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVBstore { + break + } + if x.AuxInt != i-1 { + break + } + if x.Aux != s { + break + } + _ = x.Args[2] + ptr1 := x.Args[0] + w0 := x.Args[1] + if w0.Op != OpARM64SRLconst { + break + } + if w0.AuxInt != j-8 { + break + } + w0_0 := w0.Args[0] + if w0_0.Op != OpARM64MOVDreg { + break + } + if w != w0_0.Args[0] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) { + break + } + v.reset(OpARM64MOVHstore) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg(ptr0) + v.AddArg(w0) + v.AddArg(mem) + return true + } // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) x3:(MOVBstore [i-4] {s} ptr (SRLconst [32] w) x4:(MOVBstore [i-5] {s} ptr (SRLconst [40] w) x5:(MOVBstore [i-6] {s} ptr (SRLconst [48] w) x6:(MOVBstore [i-7] {s} ptr (SRLconst [56] w) mem)))))))) // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) // result: (MOVDstore [i-7] {s} ptr (REV w) mem) @@ -6623,6 +6726,113 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w)) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] (MOVDreg w)) mem)))) + // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) + // result: (MOVWstore [i-3] {s} ptr (REVW w) mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + w := v.Args[1] + x0 := v.Args[2] + if x0.Op != OpARM64MOVBstore { + break + } + if x0.AuxInt != i-1 { + break + } + if x0.Aux != s { + break + } + _ = x0.Args[2] + if ptr != x0.Args[0] { + break + } + x0_1 := x0.Args[1] + if x0_1.Op != OpARM64SRLconst { + break + } + if x0_1.AuxInt != 8 { + break + } + x0_1_0 := x0_1.Args[0] + if x0_1_0.Op != OpARM64MOVDreg { + break + } + if w != x0_1_0.Args[0] { + break + } + x1 := x0.Args[2] + if x1.Op != OpARM64MOVBstore { + break + } + if x1.AuxInt != i-2 { + break + } + if x1.Aux != s { + break + } + _ = x1.Args[2] + if ptr != x1.Args[0] { + break + } + x1_1 := x1.Args[1] + if x1_1.Op != OpARM64SRLconst { + break + } + if x1_1.AuxInt != 16 { + break + } + x1_1_0 := x1_1.Args[0] + if x1_1_0.Op != OpARM64MOVDreg { + break + } + if w != x1_1_0.Args[0] { + break + } + x2 := x1.Args[2] + if x2.Op != OpARM64MOVBstore { + break + } + if x2.AuxInt != i-3 { + break + } + if x2.Aux != s { + break + } + _ = x2.Args[2] + if ptr != x2.Args[0] { + break + } + x2_1 := x2.Args[1] + if x2_1.Op != OpARM64SRLconst { + break + } + if x2_1.AuxInt != 24 { + break + } + x2_1_0 := x2_1.Args[0] + if x2_1_0.Op != OpARM64MOVDreg { + break + } + if w != x2_1_0.Args[0] { + break + } + mem := x2.Args[2] + if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) { + break + } + v.reset(OpARM64MOVWstore) + v.AuxInt = i - 3 + v.Aux = s + v.AddArg(ptr) + v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type) + v0.AddArg(w) + v.AddArg(v0) + v.AddArg(mem) + return true + } // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) mem)))) // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) // result: (MOVWstore [i-3] {s} ptr (REVW w) mem) @@ -6718,6 +6928,11 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool { v.AddArg(mem) return true } + return false +} +func rewriteValueARM64_OpARM64MOVBstore_20(v *Value) bool { + b := v.Block + _ = b // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) mem)) // cond: x.Uses == 1 && clobber(x) // result: (MOVHstore [i-1] {s} ptr (REV16W w) mem) @@ -6816,6 +7031,57 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVHstore [i-1] {s} ptr (REV16W w) mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + w := v.Args[1] + x := v.Args[2] + if x.Op != OpARM64MOVBstore { + break + } + if x.AuxInt != i-1 { + break + } + if x.Aux != s { + break + } + _ = x.Args[2] + if ptr != x.Args[0] { + break + } + x_1 := x.Args[1] + if x_1.Op != OpARM64SRLconst { + break + } + if x_1.AuxInt != 8 { + break + } + x_1_0 := x_1.Args[0] + if x_1_0.Op != OpARM64MOVDreg { + break + } + if w != x_1_0.Args[0] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpARM64MOVHstore) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg(ptr) + v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type) + v0.AddArg(w) + v.AddArg(v0) + v.AddArg(mem) + return true + } // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVWUreg w)) mem)) // cond: x.Uses == 1 && clobber(x) // result: (MOVHstore [i-1] {s} ptr (REV16W w) mem) @@ -6867,6 +7133,57 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVHstore [i-1] {s} ptr (REV16W w) mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + w := v.Args[1] + x := v.Args[2] + if x.Op != OpARM64MOVBstore { + break + } + if x.AuxInt != i-1 { + break + } + if x.Aux != s { + break + } + _ = x.Args[2] + if ptr != x.Args[0] { + break + } + x_1 := x.Args[1] + if x_1.Op != OpARM64SRLconst { + break + } + if x_1.AuxInt != 8 { + break + } + x_1_0 := x_1.Args[0] + if x_1_0.Op != OpARM64MOVDreg { + break + } + if w != x_1_0.Args[0] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpARM64MOVHstore) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg(ptr) + v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type) + v0.AddArg(w) + v.AddArg(v0) + v.AddArg(mem) + return true + } return false } func rewriteValueARM64_OpARM64MOVBstorezero_0(v *Value) bool { @@ -7821,6 +8138,56 @@ func rewriteValueARM64_OpARM64MOVHstore_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVDreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem)) + // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) + // result: (MOVWstore [i-2] {s} ptr0 w mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[2] + ptr0 := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64SRLconst { + break + } + if v_1.AuxInt != 16 { + break + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpARM64MOVDreg { + break + } + w := v_1_0.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVHstore { + break + } + if x.AuxInt != i-2 { + break + } + if x.Aux != s { + break + } + _ = x.Args[2] + ptr1 := x.Args[0] + if w != x.Args[1] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) { + break + } + v.reset(OpARM64MOVWstore) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg(ptr0) + v.AddArg(w) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVHstore_10(v *Value) bool { // match: (MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem)) // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) // result: (MOVWstore [i-2] {s} ptr0 w0 mem) diff --git a/test/codegen/memcombine.go b/test/codegen/memcombine.go index d08fabf846..9918e7576f 100644 --- a/test/codegen/memcombine.go +++ b/test/codegen/memcombine.go @@ -110,28 +110,28 @@ func store_le64_idx(b []byte, idx int) { func store_le32(b []byte) { // amd64:`MOVL\s` - // arm64(DISABLED):`MOVW`,-`MOV[BH]` + // arm64:`MOVW`,-`MOV[BH]` // ppc64le:`MOVW\s` binary.LittleEndian.PutUint32(b, sink32) } func store_le32_idx(b []byte, idx int) { // amd64:`MOVL\s` - // arm64(DISABLED):`MOVW`,-`MOV[BH]` + // arm64:`MOVW`,-`MOV[BH]` // ppc64le:`MOVW\s` binary.LittleEndian.PutUint32(b[idx:], sink32) } func store_le16(b []byte) { // amd64:`MOVW\s` - // arm64(DISABLED):`MOVH`,-`MOVB` + // arm64:`MOVH`,-`MOVB` // ppc64le(DISABLED):`MOVH\s` binary.LittleEndian.PutUint16(b, sink16) } func store_le16_idx(b []byte, idx int) { // amd64:`MOVW\s` - // arm64(DISABLED):`MOVH`,-`MOVB` + // arm64:`MOVH`,-`MOVB` // ppc64le(DISABLED):`MOVH\s` binary.LittleEndian.PutUint16(b[idx:], sink16) } @@ -150,24 +150,24 @@ func store_be64_idx(b []byte, idx int) { func store_be32(b []byte) { // amd64:`BSWAPL`,-`SHR.` - // arm64(DISABLED):`MOVW`,`REVW`,-`MOV[BH]` + // arm64:`MOVW`,`REVW`,-`MOV[BH]` binary.BigEndian.PutUint32(b, sink32) } func store_be32_idx(b []byte, idx int) { // amd64:`BSWAPL`,-`SHR.` - // arm64(DISABLED):`MOVW`,`REVW`,-`MOV[BH]` + // arm64:`MOVW`,`REVW`,-`MOV[BH]` binary.BigEndian.PutUint32(b[idx:], sink32) } func store_be16(b []byte) { // amd64:`ROLW\s\$8`,-`SHR.` - // arm64(DISABLED):`MOVH`,`REV16W`,-`MOVB` + // arm64:`MOVH`,`REV16W`,-`MOVB` binary.BigEndian.PutUint16(b, sink16) } func store_be16_idx(b []byte, idx int) { // amd64:`ROLW\s\$8`,-`SHR.` - // arm64(DISABLED):`MOVH`,`REV16W`,-`MOVB` + // arm64:`MOVH`,`REV16W`,-`MOVB` binary.BigEndian.PutUint16(b[idx:], sink16) }