From: Ben Shi Date: Tue, 25 Apr 2017 10:53:10 +0000 (+0000) Subject: cmd/compile/internal/ssa: more constant folding rules for ARM X-Git-Tag: go1.9beta1~385 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=38fbada5579823378217d57671de54fc5ebfa8c6;p=gostls13.git cmd/compile/internal/ssa: more constant folding rules for ARM (ADDconst [c] x) && !isARMImmRot(uint32(c)) && isARMImmRot(uint32(-c)) -> (SUBconst [int64(int32(-c))] x) (SUBconst [c] x) && !isARMImmRot(uint32(c)) && isARMImmRot(uint32(-c)) -> (ADDconst [int64(int32(-c))] x) Currently a = a + 0xfffffff1 is compiled to (variable a is in R0) MVN $14, R11 ADD R11, R0, R0 After applying the above 2 rules, it becomes SUB $15, R0, R0 (BICconst [c] (BICconst [d] x)) -> (BICconst [int64(int32(c|d))] x) This rule also optimizes the generated ARM code. The other rules are added to avoid to generate less optimized ARM code when substitutions ADD->SUB happen. Change-Id: I3ead9aae2b446b674e2ab42d37259d38ceb93a4d Reviewed-on: https://go-review.googlesource.com/41679 Reviewed-by: Keith Randall --- diff --git a/src/cmd/compile/internal/ssa/gen/ARM.rules b/src/cmd/compile/internal/ssa/gen/ARM.rules index 087359d3a4..e24cede540 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM.rules @@ -430,21 +430,34 @@ // fold offset into address (ADDconst [off1] (MOVWaddr [off2] {sym} ptr)) -> (MOVWaddr [off1+off2] {sym} ptr) +(SUBconst [off1] (MOVWaddr [off2] {sym} ptr)) -> (MOVWaddr [off2-off1] {sym} ptr) // fold address into load/store (MOVBload [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVBload [off1+off2] {sym} ptr mem) +(MOVBload [off1] {sym} (SUBconst [off2] ptr) mem) -> (MOVBload [off1-off2] {sym} ptr mem) (MOVBUload [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVBUload [off1+off2] {sym} ptr mem) +(MOVBUload [off1] {sym} (SUBconst [off2] ptr) mem) -> (MOVBUload [off1-off2] {sym} ptr mem) (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVHload [off1+off2] {sym} ptr mem) +(MOVHload [off1] {sym} (SUBconst [off2] ptr) mem) -> (MOVHload [off1-off2] {sym} ptr mem) (MOVHUload [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVHUload [off1+off2] {sym} ptr mem) +(MOVHUload [off1] {sym} (SUBconst [off2] ptr) mem) -> (MOVHUload [off1-off2] {sym} ptr mem) (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVWload [off1+off2] {sym} ptr mem) +(MOVWload [off1] {sym} (SUBconst [off2] ptr) mem) -> (MOVWload [off1-off2] {sym} ptr mem) (MOVFload [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVFload [off1+off2] {sym} ptr mem) +(MOVFload [off1] {sym} (SUBconst [off2] ptr) mem) -> (MOVFload [off1-off2] {sym} ptr mem) (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVDload [off1+off2] {sym} ptr mem) +(MOVDload [off1] {sym} (SUBconst [off2] ptr) mem) -> (MOVDload [off1-off2] {sym} ptr mem) (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) -> (MOVBstore [off1+off2] {sym} ptr val mem) +(MOVBstore [off1] {sym} (SUBconst [off2] ptr) val mem) -> (MOVBstore [off1-off2] {sym} ptr val mem) (MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem) -> (MOVHstore [off1+off2] {sym} ptr val mem) +(MOVHstore [off1] {sym} (SUBconst [off2] ptr) val mem) -> (MOVHstore [off1-off2] {sym} ptr val mem) (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem) -> (MOVWstore [off1+off2] {sym} ptr val mem) +(MOVWstore [off1] {sym} (SUBconst [off2] ptr) val mem) -> (MOVWstore [off1-off2] {sym} ptr val mem) (MOVFstore [off1] {sym} (ADDconst [off2] ptr) val mem) -> (MOVFstore [off1+off2] {sym} ptr val mem) +(MOVFstore [off1] {sym} (SUBconst [off2] ptr) val mem) -> (MOVFstore [off1-off2] {sym} ptr val mem) (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) -> (MOVDstore [off1+off2] {sym} ptr val mem) +(MOVDstore [off1] {sym} (SUBconst [off2] ptr) val mem) -> (MOVDstore [off1-off2] {sym} ptr val mem) (MOVBload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) -> (MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) @@ -783,6 +796,8 @@ (BICconst [c] _) && int32(c)==-1 -> (MOVWconst [0]) // generic constant folding +(ADDconst [c] x) && !isARMImmRot(uint32(c)) && isARMImmRot(uint32(-c)) -> (SUBconst [int64(int32(-c))] x) +(SUBconst [c] x) && !isARMImmRot(uint32(c)) && isARMImmRot(uint32(-c)) -> (ADDconst [int64(int32(-c))] x) (ADDconst [c] (MOVWconst [d])) -> (MOVWconst [int64(int32(c+d))]) (ADDconst [c] (ADDconst [d] x)) -> (ADDconst [int64(int32(c+d))] x) (ADDconst [c] (SUBconst [d] x)) -> (ADDconst [int64(int32(c-d))] x) @@ -815,6 +830,7 @@ (XORconst [c] (MOVWconst [d])) -> (MOVWconst [c^d]) (XORconst [c] (XORconst [d] x)) -> (XORconst [c^d] x) (BICconst [c] (MOVWconst [d])) -> (MOVWconst [d&^c]) +(BICconst [c] (BICconst [d] x)) -> (BICconst [int64(int32(c|d))] x) (MVN (MOVWconst [c])) -> (MOVWconst [^c]) (MOVBreg (MOVWconst [c])) -> (MOVWconst [int64(int8(c))]) (MOVBUreg (MOVWconst [c])) -> (MOVWconst [int64(uint8(c))]) diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 68f3a675a6..ccd723388d 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -624,3 +624,15 @@ func reciprocalExact32(c float32) bool { return true } } + +// check if an immediate can be directly encoded into an ARM's instruction +func isARMImmRot(v uint32) bool { + for i := 0; i < 16; i++ { + if v&^0xff == 0 { + return true + } + v = v<<2 | v>>30 + } + + return false +} diff --git a/src/cmd/compile/internal/ssa/rewriteARM.go b/src/cmd/compile/internal/ssa/rewriteARM.go index 1176969310..505c96a54d 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM.go +++ b/src/cmd/compile/internal/ssa/rewriteARM.go @@ -2444,6 +2444,20 @@ func rewriteValueARM_OpARMADDconst_0(v *Value) bool { v.AddArg(x) return true } + // match: (ADDconst [c] x) + // cond: !isARMImmRot(uint32(c)) && isARMImmRot(uint32(-c)) + // result: (SUBconst [int64(int32(-c))] x) + for { + c := v.AuxInt + x := v.Args[0] + if !(!isARMImmRot(uint32(c)) && isARMImmRot(uint32(-c))) { + break + } + v.reset(OpARMSUBconst) + v.AuxInt = int64(int32(-c)) + v.AddArg(x) + return true + } // match: (ADDconst [c] (MOVWconst [d])) // cond: // result: (MOVWconst [int64(int32(c+d))]) @@ -3720,6 +3734,22 @@ func rewriteValueARM_OpARMBICconst_0(v *Value) bool { v.AuxInt = d &^ c return true } + // match: (BICconst [c] (BICconst [d] x)) + // cond: + // result: (BICconst [int64(int32(c|d))] x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpARMBICconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpARMBICconst) + v.AuxInt = int64(int32(c | d)) + v.AddArg(x) + return true + } return false } func rewriteValueARM_OpARMBICshiftLL_0(v *Value) bool { @@ -5477,6 +5507,26 @@ func rewriteValueARM_OpARMMOVBUload_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBUload [off1] {sym} (SUBconst [off2] ptr) mem) + // cond: + // result: (MOVBUload [off1-off2] {sym} ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpARMSUBconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + mem := v.Args[1] + v.reset(OpARMMOVBUload) + v.AuxInt = off1 - off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } // match: (MOVBUload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) // cond: canMergeSym(sym1,sym2) // result: (MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) @@ -5602,6 +5652,26 @@ func rewriteValueARM_OpARMMOVBload_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBload [off1] {sym} (SUBconst [off2] ptr) mem) + // cond: + // result: (MOVBload [off1-off2] {sym} ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpARMSUBconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + mem := v.Args[1] + v.reset(OpARMMOVBload) + v.AuxInt = off1 - off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } // match: (MOVBload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) // cond: canMergeSym(sym1,sym2) // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) @@ -5732,6 +5802,28 @@ func rewriteValueARM_OpARMMOVBstore_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [off1] {sym} (SUBconst [off2] ptr) val mem) + // cond: + // result: (MOVBstore [off1-off2] {sym} ptr val mem) + for { + off1 := v.AuxInt + sym := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpARMSUBconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + v.reset(OpARMMOVBstore) + v.AuxInt = off1 - off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(val) + v.AddArg(mem) + return true + } // match: (MOVBstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) // cond: canMergeSym(sym1,sym2) // result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) @@ -5865,6 +5957,26 @@ func rewriteValueARM_OpARMMOVDload_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVDload [off1] {sym} (SUBconst [off2] ptr) mem) + // cond: + // result: (MOVDload [off1-off2] {sym} ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpARMSUBconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + mem := v.Args[1] + v.reset(OpARMMOVDload) + v.AuxInt = off1 - off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } // match: (MOVDload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) // cond: canMergeSym(sym1,sym2) // result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) @@ -5937,6 +6049,28 @@ func rewriteValueARM_OpARMMOVDstore_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVDstore [off1] {sym} (SUBconst [off2] ptr) val mem) + // cond: + // result: (MOVDstore [off1-off2] {sym} ptr val mem) + for { + off1 := v.AuxInt + sym := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpARMSUBconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + v.reset(OpARMMOVDstore) + v.AuxInt = off1 - off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(val) + v.AddArg(mem) + return true + } // match: (MOVDstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) // cond: canMergeSym(sym1,sym2) // result: (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) @@ -5986,6 +6120,26 @@ func rewriteValueARM_OpARMMOVFload_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVFload [off1] {sym} (SUBconst [off2] ptr) mem) + // cond: + // result: (MOVFload [off1-off2] {sym} ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpARMSUBconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + mem := v.Args[1] + v.reset(OpARMMOVFload) + v.AuxInt = off1 - off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } // match: (MOVFload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) // cond: canMergeSym(sym1,sym2) // result: (MOVFload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) @@ -6058,6 +6212,28 @@ func rewriteValueARM_OpARMMOVFstore_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVFstore [off1] {sym} (SUBconst [off2] ptr) val mem) + // cond: + // result: (MOVFstore [off1-off2] {sym} ptr val mem) + for { + off1 := v.AuxInt + sym := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpARMSUBconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + v.reset(OpARMMOVFstore) + v.AuxInt = off1 - off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(val) + v.AddArg(mem) + return true + } // match: (MOVFstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) // cond: canMergeSym(sym1,sym2) // result: (MOVFstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) @@ -6107,6 +6283,26 @@ func rewriteValueARM_OpARMMOVHUload_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVHUload [off1] {sym} (SUBconst [off2] ptr) mem) + // cond: + // result: (MOVHUload [off1-off2] {sym} ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpARMSUBconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + mem := v.Args[1] + v.reset(OpARMMOVHUload) + v.AuxInt = off1 - off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } // match: (MOVHUload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) // cond: canMergeSym(sym1,sym2) // result: (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) @@ -6256,6 +6452,26 @@ func rewriteValueARM_OpARMMOVHload_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVHload [off1] {sym} (SUBconst [off2] ptr) mem) + // cond: + // result: (MOVHload [off1-off2] {sym} ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpARMSUBconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + mem := v.Args[1] + v.reset(OpARMMOVHload) + v.AuxInt = off1 - off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } // match: (MOVHload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) // cond: canMergeSym(sym1,sym2) // result: (MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) @@ -6434,6 +6650,28 @@ func rewriteValueARM_OpARMMOVHstore_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVHstore [off1] {sym} (SUBconst [off2] ptr) val mem) + // cond: + // result: (MOVHstore [off1-off2] {sym} ptr val mem) + for { + off1 := v.AuxInt + sym := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpARMSUBconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + v.reset(OpARMMOVHstore) + v.AuxInt = off1 - off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(val) + v.AddArg(mem) + return true + } // match: (MOVHstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) // cond: canMergeSym(sym1,sym2) // result: (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) @@ -6529,6 +6767,26 @@ func rewriteValueARM_OpARMMOVWload_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVWload [off1] {sym} (SUBconst [off2] ptr) mem) + // cond: + // result: (MOVWload [off1-off2] {sym} ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpARMSUBconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + mem := v.Args[1] + v.reset(OpARMMOVWload) + v.AuxInt = off1 - off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } // match: (MOVWload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) // cond: canMergeSym(sym1,sym2) // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) @@ -7047,6 +7305,28 @@ func rewriteValueARM_OpARMMOVWstore_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVWstore [off1] {sym} (SUBconst [off2] ptr) val mem) + // cond: + // result: (MOVWstore [off1-off2] {sym} ptr val mem) + for { + off1 := v.AuxInt + sym := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpARMSUBconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + v.reset(OpARMMOVWstore) + v.AuxInt = off1 - off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(val) + v.AddArg(mem) + return true + } // match: (MOVWstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) // cond: canMergeSym(sym1,sym2) // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) @@ -11937,6 +12217,24 @@ func rewriteValueARM_OpARMSUBSshiftRLreg_0(v *Value) bool { return false } func rewriteValueARM_OpARMSUBconst_0(v *Value) bool { + // match: (SUBconst [off1] (MOVWaddr [off2] {sym} ptr)) + // cond: + // result: (MOVWaddr [off2-off1] {sym} ptr) + for { + off1 := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpARMMOVWaddr { + break + } + off2 := v_0.AuxInt + sym := v_0.Aux + ptr := v_0.Args[0] + v.reset(OpARMMOVWaddr) + v.AuxInt = off2 - off1 + v.Aux = sym + v.AddArg(ptr) + return true + } // match: (SUBconst [0] x) // cond: // result: x @@ -11950,6 +12248,20 @@ func rewriteValueARM_OpARMSUBconst_0(v *Value) bool { v.AddArg(x) return true } + // match: (SUBconst [c] x) + // cond: !isARMImmRot(uint32(c)) && isARMImmRot(uint32(-c)) + // result: (ADDconst [int64(int32(-c))] x) + for { + c := v.AuxInt + x := v.Args[0] + if !(!isARMImmRot(uint32(c)) && isARMImmRot(uint32(-c))) { + break + } + v.reset(OpARMADDconst) + v.AuxInt = int64(int32(-c)) + v.AddArg(x) + return true + } // match: (SUBconst [c] (MOVWconst [d])) // cond: // result: (MOVWconst [int64(int32(d-c))])