From 95dda75bde8a6625b857b5f8013eed5e9563fd02 Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Thu, 18 Oct 2018 05:11:38 +0000 Subject: [PATCH] cmd/compile: optimize store combination on 386/amd64 This CL add 3 rules to combine byte-store to word-store on386 and amd64. Change-Id: Iffd9cda42f1961680c81def4edc773ad58f211b3 Reviewed-on: https://go-review.googlesource.com/c/143057 Run-TryBot: Ben Shi TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/gen/386.rules | 10 + src/cmd/compile/internal/ssa/gen/AMD64.rules | 19 ++ src/cmd/compile/internal/ssa/rewrite386.go | 62 +++++ src/cmd/compile/internal/ssa/rewriteAMD64.go | 236 ++++++++++++++++++- test/codegen/memcombine.go | 6 + 5 files changed, 332 insertions(+), 1 deletion(-) diff --git a/src/cmd/compile/internal/ssa/gen/386.rules b/src/cmd/compile/internal/ssa/gen/386.rules index 3a54a2a206..2241a65d55 100644 --- a/src/cmd/compile/internal/ssa/gen/386.rules +++ b/src/cmd/compile/internal/ssa/gen/386.rules @@ -1127,11 +1127,21 @@ && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) +(MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem)) + && x.Uses == 1 + && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() + && clobber(x) + -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem)) && x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) +(MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem)) + && x.Uses == 1 + && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() + && clobber(x) + -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) (MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem)) && x.Uses == 1 diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index adb94c3bfe..7bba4bcccb 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -2125,16 +2125,31 @@ && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) +(MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem)) + && x.Uses == 1 + && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() + && clobber(x) + -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem)) && x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) +(MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem)) + && x.Uses == 1 + && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() + && clobber(x) + -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) (MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem)) && x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) +(MOVLstoreconst [a] {s} p x:(MOVLstoreconst [c] {s} p mem)) + && x.Uses == 1 + && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() + && clobber(x) + -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) (MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem)) && config.useSSE && x.Uses == 1 @@ -2176,6 +2191,10 @@ && x.Uses == 1 && clobber(x) -> (MOVWstore [i-1] {s} p w mem) +(MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHR(W|L|Q)const [8] w) mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVWstore [i] {s} p w mem) (MOVBstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p w0:(SHR(L|Q)const [j-8] w) mem)) && x.Uses == 1 && clobber(x) diff --git a/src/cmd/compile/internal/ssa/rewrite386.go b/src/cmd/compile/internal/ssa/rewrite386.go index 2fce9c151e..61bdfcbcbb 100644 --- a/src/cmd/compile/internal/ssa/rewrite386.go +++ b/src/cmd/compile/internal/ssa/rewrite386.go @@ -5916,6 +5916,37 @@ func rewriteValue386_Op386MOVBstoreconst_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) + // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) + for { + a := v.AuxInt + s := v.Aux + _ = v.Args[1] + p := v.Args[0] + x := v.Args[1] + if x.Op != Op386MOVBstoreconst { + break + } + c := x.AuxInt + if x.Aux != s { + break + } + _ = x.Args[1] + if p != x.Args[0] { + break + } + mem := x.Args[1] + if !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) { + break + } + v.reset(Op386MOVWstoreconst) + v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off()) + v.Aux = s + v.AddArg(p) + v.AddArg(mem) + return true + } return false } func rewriteValue386_Op386MOVBstoreconstidx1_0(v *Value) bool { @@ -11729,6 +11760,37 @@ func rewriteValue386_Op386MOVWstoreconst_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) + // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) + for { + a := v.AuxInt + s := v.Aux + _ = v.Args[1] + p := v.Args[0] + x := v.Args[1] + if x.Op != Op386MOVWstoreconst { + break + } + c := x.AuxInt + if x.Aux != s { + break + } + _ = x.Args[1] + if p != x.Args[0] { + break + } + mem := x.Args[1] + if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { + break + } + v.reset(Op386MOVLstoreconst) + v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) + v.Aux = s + v.AddArg(p) + v.AddArg(mem) + return true + } return false } func rewriteValue386_Op386MOVWstoreconstidx1_0(v *Value) bool { diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 254c40a4ea..c7aa87b956 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -248,7 +248,7 @@ func rewriteValueAMD64(v *Value) bool { case OpAMD64MOVBloadidx1: return rewriteValueAMD64_OpAMD64MOVBloadidx1_0(v) case OpAMD64MOVBstore: - return rewriteValueAMD64_OpAMD64MOVBstore_0(v) || rewriteValueAMD64_OpAMD64MOVBstore_10(v) || rewriteValueAMD64_OpAMD64MOVBstore_20(v) + return rewriteValueAMD64_OpAMD64MOVBstore_0(v) || rewriteValueAMD64_OpAMD64MOVBstore_10(v) || rewriteValueAMD64_OpAMD64MOVBstore_20(v) || rewriteValueAMD64_OpAMD64MOVBstore_30(v) case OpAMD64MOVBstoreconst: return rewriteValueAMD64_OpAMD64MOVBstoreconst_0(v) case OpAMD64MOVBstoreconstidx1: @@ -13556,6 +13556,141 @@ func rewriteValueAMD64_OpAMD64MOVBstore_20(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHRWconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i] {s} p w mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[2] + p := v.Args[0] + w := v.Args[1] + x := v.Args[2] + if x.Op != OpAMD64MOVBstore { + break + } + if x.AuxInt != i+1 { + break + } + if x.Aux != s { + break + } + _ = x.Args[2] + if p != x.Args[0] { + break + } + x_1 := x.Args[1] + if x_1.Op != OpAMD64SHRWconst { + break + } + if x_1.AuxInt != 8 { + break + } + if w != x_1.Args[0] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i + v.Aux = s + v.AddArg(p) + v.AddArg(w) + v.AddArg(mem) + return true + } + // match: (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHRLconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i] {s} p w mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[2] + p := v.Args[0] + w := v.Args[1] + x := v.Args[2] + if x.Op != OpAMD64MOVBstore { + break + } + if x.AuxInt != i+1 { + break + } + if x.Aux != s { + break + } + _ = x.Args[2] + if p != x.Args[0] { + break + } + x_1 := x.Args[1] + if x_1.Op != OpAMD64SHRLconst { + break + } + if x_1.AuxInt != 8 { + break + } + if w != x_1.Args[0] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i + v.Aux = s + v.AddArg(p) + v.AddArg(w) + v.AddArg(mem) + return true + } + // match: (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHRQconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i] {s} p w mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[2] + p := v.Args[0] + w := v.Args[1] + x := v.Args[2] + if x.Op != OpAMD64MOVBstore { + break + } + if x.AuxInt != i+1 { + break + } + if x.Aux != s { + break + } + _ = x.Args[2] + if p != x.Args[0] { + break + } + x_1 := x.Args[1] + if x_1.Op != OpAMD64SHRQconst { + break + } + if x_1.AuxInt != 8 { + break + } + if w != x_1.Args[0] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i + v.Aux = s + v.AddArg(p) + v.AddArg(w) + v.AddArg(mem) + return true + } // match: (MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem)) // cond: x.Uses == 1 && clobber(x) // result: (MOVWstore [i-1] {s} p w0 mem) @@ -13723,6 +13858,9 @@ func rewriteValueAMD64_OpAMD64MOVBstore_20(v *Value) bool { v.AddArg(mem) return true } + return false +} +func rewriteValueAMD64_OpAMD64MOVBstore_30(v *Value) bool { // match: (MOVBstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2) // result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) @@ -13910,6 +14048,37 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) + // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) + for { + a := v.AuxInt + s := v.Aux + _ = v.Args[1] + p := v.Args[0] + x := v.Args[1] + if x.Op != OpAMD64MOVBstoreconst { + break + } + c := x.AuxInt + if x.Aux != s { + break + } + _ = x.Args[1] + if p != x.Args[0] { + break + } + mem := x.Args[1] + if !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstoreconst) + v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off()) + v.Aux = s + v.AddArg(p) + v.AddArg(mem) + return true + } // match: (MOVBstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) // result: (MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) @@ -17553,6 +17722,40 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVLstoreconst [a] {s} p x:(MOVLstoreconst [c] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) + // result: (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) + for { + a := v.AuxInt + s := v.Aux + _ = v.Args[1] + p := v.Args[0] + x := v.Args[1] + if x.Op != OpAMD64MOVLstoreconst { + break + } + c := x.AuxInt + if x.Aux != s { + break + } + _ = x.Args[1] + if p != x.Args[0] { + break + } + mem := x.Args[1] + if !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) { + break + } + v.reset(OpAMD64MOVQstore) + v.AuxInt = ValAndOff(a).Off() + v.Aux = s + v.AddArg(p) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) + v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32 + v.AddArg(v0) + v.AddArg(mem) + return true + } // match: (MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) @@ -23867,6 +24070,37 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconst_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) + // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) + for { + a := v.AuxInt + s := v.Aux + _ = v.Args[1] + p := v.Args[0] + x := v.Args[1] + if x.Op != OpAMD64MOVWstoreconst { + break + } + c := x.AuxInt + if x.Aux != s { + break + } + _ = x.Args[1] + if p != x.Args[0] { + break + } + mem := x.Args[1] + if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { + break + } + v.reset(OpAMD64MOVLstoreconst) + v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) + v.Aux = s + v.AddArg(p) + v.AddArg(mem) + return true + } // match: (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) diff --git a/test/codegen/memcombine.go b/test/codegen/memcombine.go index db49142e25..b3d2cb2067 100644 --- a/test/codegen/memcombine.go +++ b/test/codegen/memcombine.go @@ -434,6 +434,7 @@ func store_le_byte_2(b []byte, val uint16) { func store_le_byte_2_inv(b []byte, val uint16) { _ = b[2] // 386:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB` + // amd64:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB` b[2], b[1] = byte(val>>8), byte(val) } @@ -542,6 +543,8 @@ func zero_byte_2(b1, b2 []byte) { // 386:`MOVW\s[$]0,\s\([A-Z]+\)` b1[0], b1[1] = 0, 0 // arm64:"MOVH\tZR",-"MOVB" + // 386:`MOVW\s[$]0,\s\([A-Z]+\)` + // amd64:`MOVW\s[$]0,\s\([A-Z]+\)` b2[1], b2[0] = 0, 0 } @@ -596,6 +599,8 @@ func zero_uint16_2(h1, h2 []uint16) { // 386:`MOVL\s[$]0,\s\([A-Z]+\)` h1[0], h1[1] = 0, 0 // arm64:"MOVW\tZR",-"MOVB",-"MOVH" + // amd64:`MOVL\s[$]0,\s\([A-Z]+\)` + // 386:`MOVL\s[$]0,\s\([A-Z]+\)` h2[1], h2[0] = 0, 0 } @@ -620,6 +625,7 @@ func zero_uint32_2(w1, w2 []uint32) { // amd64:`MOVQ\s[$]0,\s\([A-Z]+\)` w1[0], w1[1] = 0, 0 // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW" + // amd64:`MOVQ\s[$]0,\s\([A-Z]+\)` w2[1], w2[0] = 0, 0 } -- 2.50.0