Ensure that any comparison between two values has the same argument
order. This helps ensure that they can be eliminated during the
lowered CSE pass which will be particularly important if we eliminate
the Greater and Geq ops (see #37316).
Example:
CMP R0, R1
BLT L1
CMP R1, R0 // different order, cannot eliminate
BEQ L2
CMP R0, R1
BLT L1
CMP R0, R1 // same order, can eliminate
BEQ L2
This does have some drawbacks. Notably comparisons might 'flip'
direction in the assembly output after even small changes to the
code or compiler. It should help make optimizations more reliable
however.
compilecmp master -> HEAD
master (
218f4572f5): text/template: make reflect.Value indirections more robust
HEAD (
f1661fef3e): cmd/compile: canonicalize comparison argument order
platform: linux/amd64
file before after Δ %
api
6063927 6068023 +4096 +0.068%
asm
5191757 5183565 -8192 -0.158%
cgo
4893518 4901710 +8192 +0.167%
cover
5330345 5326249 -4096 -0.077%
fix
3417778 3421874 +4096 +0.120%
pprof
14889456 14885360 -4096 -0.028%
test2json
2848138 2844042 -4096 -0.144%
trace
11746239 11733951 -12288 -0.105%
total
132739173 132722789 -16384 -0.012%
Change-Id: I11736b3fe2a4553f6fc65018f475e88217fa22f9
Reviewed-on: https://go-review.googlesource.com/c/go/+/220425
Run-TryBot: Michael Munday <mike.munday@ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
(CMPB x (MOVLconst [c])) -> (CMPBconst x [int64(int8(c))])
(CMPB (MOVLconst [c]) x) -> (InvertFlags (CMPBconst x [int64(int8(c))]))
+// Canonicalize the order of arguments to comparisons - helps with CSE.
+(CMP(L|W|B) x y) && x.ID > y.ID -> (InvertFlags (CMP(L|W|B) y x))
+
// strength reduction
// Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf:
// 1 - addl, shll, leal, negl, subl
(CMPB x (MOVLconst [c])) -> (CMPBconst x [int64(int8(c))])
(CMPB (MOVLconst [c]) x) -> (InvertFlags (CMPBconst x [int64(int8(c))]))
+// Canonicalize the order of arguments to comparisons - helps with CSE.
+(CMP(Q|L|W|B) x y) && x.ID > y.ID -> (InvertFlags (CMP(Q|L|W|B) y x))
+
// Using MOVZX instead of AND is cheaper.
(AND(Q|L)const [ 0xFF] x) -> (MOVBQZX x)
(AND(Q|L)const [0xFFFF] x) -> (MOVWQZX x)
(TST x (MOVWconst [c])) -> (TSTconst [c] x)
(TEQ x (MOVWconst [c])) -> (TEQconst [c] x)
+// Canonicalize the order of arguments to comparisons - helps with CSE.
+(CMP x y) && x.ID > y.ID -> (InvertFlags (CMP y x))
+
// don't extend after proper load
// MOVWreg instruction is not emitted if src and dst registers are same, but it ensures the type.
(MOVBreg x:(MOVBload _ _)) -> (MOVWreg x)
(CMPW x (MOVDconst [c])) -> (CMPWconst [int64(int32(c))] x)
(CMPW (MOVDconst [c]) x) -> (InvertFlags (CMPWconst [int64(int32(c))] x))
+// Canonicalize the order of arguments to comparisons - helps with CSE.
+((CMP|CMPW) x y) && x.ID > y.ID -> (InvertFlags ((CMP|CMPW) y x))
+
// mul-neg -> mneg
(NEG (MUL x y)) -> (MNEG x y)
(NEG (MULW x y)) -> (MNEGW x y)
(CMPWU x (MOVDconst [c])) && isU16Bit(c) -> (CMPWUconst x [c])
(CMPWU (MOVDconst [c]) y) && isU16Bit(c) -> (InvertFlags (CMPWUconst y [c]))
+// Canonicalize the order of arguments to comparisons - helps with CSE.
+((CMP|CMPW|CMPU|CMPWU) x y) && x.ID > y.ID -> (InvertFlags ((CMP|CMPW|CMPU|CMPWU) y x))
+
// ISEL auxInt values 0=LT 1=GT 2=EQ arg2 ? arg0 : arg1
// ISEL auxInt values 4=GE 5=LE 6=NE arg2 ? arg1 : arg0
// ISELB special case where arg0, arg1 values are 0, 1
(CMPWU x (MOVDconst [c])) -> (CMPWUconst x [int64(int32(c))])
(CMPWU (MOVDconst [c]) x) -> (InvertFlags (CMPWUconst x [int64(int32(c))]))
+// Canonicalize the order of arguments to comparisons - helps with CSE.
+((CMP|CMPW|CMPU|CMPWU) x y) && x.ID > y.ID -> (InvertFlags ((CMP|CMPW|CMPU|CMPWU) y x))
+
// Using MOV{W,H,B}Zreg instead of AND is cheaper.
(AND x (MOVDconst [0xFF])) -> (MOVBZreg x)
(AND x (MOVDconst [0xFFFF])) -> (MOVHZreg x)
v.AddArg(v0)
return true
}
+ // match: (CMPB x y)
+ // cond: x.ID > y.ID
+ // result: (InvertFlags (CMPB y x))
+ for {
+ x := v_0
+ y := v_1
+ if !(x.ID > y.ID) {
+ break
+ }
+ v.reset(Op386InvertFlags)
+ v0 := b.NewValue0(v.Pos, Op386CMPB, types.TypeFlags)
+ v0.AddArg(y)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
// match: (CMPB l:(MOVBload {sym} [off] ptr mem) x)
// cond: canMergeLoad(v, l) && clobber(l)
// result: (CMPBload {sym} [off] ptr x mem)
v.AddArg(v0)
return true
}
+ // match: (CMPL x y)
+ // cond: x.ID > y.ID
+ // result: (InvertFlags (CMPL y x))
+ for {
+ x := v_0
+ y := v_1
+ if !(x.ID > y.ID) {
+ break
+ }
+ v.reset(Op386InvertFlags)
+ v0 := b.NewValue0(v.Pos, Op386CMPL, types.TypeFlags)
+ v0.AddArg(y)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
// match: (CMPL l:(MOVLload {sym} [off] ptr mem) x)
// cond: canMergeLoad(v, l) && clobber(l)
// result: (CMPLload {sym} [off] ptr x mem)
v.AddArg(v0)
return true
}
+ // match: (CMPW x y)
+ // cond: x.ID > y.ID
+ // result: (InvertFlags (CMPW y x))
+ for {
+ x := v_0
+ y := v_1
+ if !(x.ID > y.ID) {
+ break
+ }
+ v.reset(Op386InvertFlags)
+ v0 := b.NewValue0(v.Pos, Op386CMPW, types.TypeFlags)
+ v0.AddArg(y)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
// match: (CMPW l:(MOVWload {sym} [off] ptr mem) x)
// cond: canMergeLoad(v, l) && clobber(l)
// result: (CMPWload {sym} [off] ptr x mem)
v.AddArg(v0)
return true
}
+ // match: (CMPB x y)
+ // cond: x.ID > y.ID
+ // result: (InvertFlags (CMPB y x))
+ for {
+ x := v_0
+ y := v_1
+ if !(x.ID > y.ID) {
+ break
+ }
+ v.reset(OpAMD64InvertFlags)
+ v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
+ v0.AddArg(y)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
// match: (CMPB l:(MOVBload {sym} [off] ptr mem) x)
// cond: canMergeLoad(v, l) && clobber(l)
// result: (CMPBload {sym} [off] ptr x mem)
v.AddArg(v0)
return true
}
+ // match: (CMPL x y)
+ // cond: x.ID > y.ID
+ // result: (InvertFlags (CMPL y x))
+ for {
+ x := v_0
+ y := v_1
+ if !(x.ID > y.ID) {
+ break
+ }
+ v.reset(OpAMD64InvertFlags)
+ v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
+ v0.AddArg(y)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
// match: (CMPL l:(MOVLload {sym} [off] ptr mem) x)
// cond: canMergeLoad(v, l) && clobber(l)
// result: (CMPLload {sym} [off] ptr x mem)
v.AddArg(v0)
return true
}
+ // match: (CMPQ x y)
+ // cond: x.ID > y.ID
+ // result: (InvertFlags (CMPQ y x))
+ for {
+ x := v_0
+ y := v_1
+ if !(x.ID > y.ID) {
+ break
+ }
+ v.reset(OpAMD64InvertFlags)
+ v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
+ v0.AddArg(y)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
// match: (CMPQ l:(MOVQload {sym} [off] ptr mem) x)
// cond: canMergeLoad(v, l) && clobber(l)
// result: (CMPQload {sym} [off] ptr x mem)
v.AddArg(v0)
return true
}
+ // match: (CMPW x y)
+ // cond: x.ID > y.ID
+ // result: (InvertFlags (CMPW y x))
+ for {
+ x := v_0
+ y := v_1
+ if !(x.ID > y.ID) {
+ break
+ }
+ v.reset(OpAMD64InvertFlags)
+ v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
+ v0.AddArg(y)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
// match: (CMPW l:(MOVWload {sym} [off] ptr mem) x)
// cond: canMergeLoad(v, l) && clobber(l)
// result: (CMPWload {sym} [off] ptr x mem)
v.AddArg(v0)
return true
}
+ // match: (CMP x y)
+ // cond: x.ID > y.ID
+ // result: (InvertFlags (CMP y x))
+ for {
+ x := v_0
+ y := v_1
+ if !(x.ID > y.ID) {
+ break
+ }
+ v.reset(OpARMInvertFlags)
+ v0 := b.NewValue0(v.Pos, OpARMCMP, types.TypeFlags)
+ v0.AddArg(y)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
// match: (CMP x (SLLconst [c] y))
// result: (CMPshiftLL x y [c])
for {
v.AddArg(v0)
return true
}
+ // match: (CMP x y)
+ // cond: x.ID > y.ID
+ // result: (InvertFlags (CMP y x))
+ for {
+ x := v_0
+ y := v_1
+ if !(x.ID > y.ID) {
+ break
+ }
+ v.reset(OpARM64InvertFlags)
+ v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags)
+ v0.AddArg(y)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
// match: (CMP x0 x1:(SLLconst [c] y))
// cond: clobberIfDead(x1)
// result: (CMPshiftLL x0 y [c])
v.AddArg(v0)
return true
}
+ // match: (CMPW x y)
+ // cond: x.ID > y.ID
+ // result: (InvertFlags (CMPW y x))
+ for {
+ x := v_0
+ y := v_1
+ if !(x.ID > y.ID) {
+ break
+ }
+ v.reset(OpARM64InvertFlags)
+ v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags)
+ v0.AddArg(y)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
return false
}
func rewriteValueARM64_OpARM64CMPWconst(v *Value) bool {
v.AddArg(v0)
return true
}
+ // match: (CMP x y)
+ // cond: x.ID > y.ID
+ // result: (InvertFlags (CMP y x))
+ for {
+ x := v_0
+ y := v_1
+ if !(x.ID > y.ID) {
+ break
+ }
+ v.reset(OpPPC64InvertFlags)
+ v0 := b.NewValue0(v.Pos, OpPPC64CMP, types.TypeFlags)
+ v0.AddArg(y)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
return false
}
func rewriteValuePPC64_OpPPC64CMPU(v *Value) bool {
v.AddArg(v0)
return true
}
+ // match: (CMPU x y)
+ // cond: x.ID > y.ID
+ // result: (InvertFlags (CMPU y x))
+ for {
+ x := v_0
+ y := v_1
+ if !(x.ID > y.ID) {
+ break
+ }
+ v.reset(OpPPC64InvertFlags)
+ v0 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
+ v0.AddArg(y)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
return false
}
func rewriteValuePPC64_OpPPC64CMPUconst(v *Value) bool {
v.AddArg(v0)
return true
}
+ // match: (CMPW x y)
+ // cond: x.ID > y.ID
+ // result: (InvertFlags (CMPW y x))
+ for {
+ x := v_0
+ y := v_1
+ if !(x.ID > y.ID) {
+ break
+ }
+ v.reset(OpPPC64InvertFlags)
+ v0 := b.NewValue0(v.Pos, OpPPC64CMPW, types.TypeFlags)
+ v0.AddArg(y)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
return false
}
func rewriteValuePPC64_OpPPC64CMPWU(v *Value) bool {
v.AddArg(v0)
return true
}
+ // match: (CMPWU x y)
+ // cond: x.ID > y.ID
+ // result: (InvertFlags (CMPWU y x))
+ for {
+ x := v_0
+ y := v_1
+ if !(x.ID > y.ID) {
+ break
+ }
+ v.reset(OpPPC64InvertFlags)
+ v0 := b.NewValue0(v.Pos, OpPPC64CMPWU, types.TypeFlags)
+ v0.AddArg(y)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
return false
}
func rewriteValuePPC64_OpPPC64CMPWUconst(v *Value) bool {
v.AddArg(v0)
return true
}
+ // match: (CMP x y)
+ // cond: x.ID > y.ID
+ // result: (InvertFlags (CMP y x))
+ for {
+ x := v_0
+ y := v_1
+ if !(x.ID > y.ID) {
+ break
+ }
+ v.reset(OpS390XInvertFlags)
+ v0 := b.NewValue0(v.Pos, OpS390XCMP, types.TypeFlags)
+ v0.AddArg(y)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
return false
}
func rewriteValueS390X_OpS390XCMPU(v *Value) bool {
v.AddArg(v0)
return true
}
+ // match: (CMPU x y)
+ // cond: x.ID > y.ID
+ // result: (InvertFlags (CMPU y x))
+ for {
+ x := v_0
+ y := v_1
+ if !(x.ID > y.ID) {
+ break
+ }
+ v.reset(OpS390XInvertFlags)
+ v0 := b.NewValue0(v.Pos, OpS390XCMPU, types.TypeFlags)
+ v0.AddArg(y)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
return false
}
func rewriteValueS390X_OpS390XCMPUconst(v *Value) bool {
v.AddArg(v0)
return true
}
+ // match: (CMPW x y)
+ // cond: x.ID > y.ID
+ // result: (InvertFlags (CMPW y x))
+ for {
+ x := v_0
+ y := v_1
+ if !(x.ID > y.ID) {
+ break
+ }
+ v.reset(OpS390XInvertFlags)
+ v0 := b.NewValue0(v.Pos, OpS390XCMPW, types.TypeFlags)
+ v0.AddArg(y)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
// match: (CMPW x (MOVWreg y))
// result: (CMPW x y)
for {
v.AddArg(v0)
return true
}
+ // match: (CMPWU x y)
+ // cond: x.ID > y.ID
+ // result: (InvertFlags (CMPWU y x))
+ for {
+ x := v_0
+ y := v_1
+ if !(x.ID > y.ID) {
+ break
+ }
+ v.reset(OpS390XInvertFlags)
+ v0 := b.NewValue0(v.Pos, OpS390XCMPWU, types.TypeFlags)
+ v0.AddArg(y)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
// match: (CMPWU x (MOVWreg y))
// result: (CMPWU x y)
for {
// Signed 64-bit compare-and-branch.
func si64(x, y chan int64) {
- // s390x:"CGRJ\t[$]4, R[0-9]+, R[0-9]+, "
+ // s390x:"CGRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
for <-x < <-y {
dummy()
}
// Unsigned 64-bit compare-and-branch.
func ui64(x, y chan uint64) {
- // s390x:"CLGRJ\t[$]2, R[0-9]+, R[0-9]+, "
+ // s390x:"CLGRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
for <-x > <-y {
dummy()
}
// Signed 32-bit compare-and-branch.
func si32(x, y chan int32) {
- // s390x:"CRJ\t[$]4, R[0-9]+, R[0-9]+, "
+ // s390x:"CRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
for <-x < <-y {
dummy()
}
// Unsigned 32-bit compare-and-branch.
func ui32(x, y chan uint32) {
- // s390x:"CLRJ\t[$]2, R[0-9]+, R[0-9]+, "
+ // s390x:"CLRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
for <-x > <-y {
dummy()
}
x = -y
}
// amd64:"CMOVQCS"
- // arm64:"CSEL\tLO"
+ // arm64:"CSEL\t(LO|HI)"
// wasm:"Select"
return x
}
x = -y
}
// amd64:"CMOVLCS"
- // arm64:"CSEL\tLO"
+ // arm64:"CSEL\t(LO|HI)"
// wasm:"Select"
return x
}
x = -y
}
// amd64:"CMOVWCS"
- // arm64:"CSEL\tLO"
+ // arm64:"CSEL\t(LO|HI)"
// wasm:"Select"
return x
}