From 1662d552474f9811589b9abfc02d923b3b94d787 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Mon, 27 Oct 2025 18:20:44 +0100 Subject: [PATCH] cmd/compile: do not Zext bools to 64bits in amd64 CMOV generation rules Change-Id: I77b714ed767e50d13183f4307f65e47ca7577f9f Reviewed-on: https://go-review.googlesource.com/c/go/+/715380 Reviewed-by: Keith Randall Reviewed-by: Michael Knyszek Auto-Submit: Jorropo Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI --- src/cmd/compile/internal/ssa/_gen/AMD64.rules | 26 ++- src/cmd/compile/internal/ssa/rewriteAMD64.go | 175 ++++++++++++++---- 2 files changed, 159 insertions(+), 42 deletions(-) diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules index 0bea99e38d..b9af262334 100644 --- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules @@ -397,20 +397,30 @@ (CondSelect x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is16BitInt(t) => (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond) -// If the condition does not set the flags, we need to generate a comparison. -(CondSelect x y check) && !check.Type.IsFlags() && check.Type.Size() == 1 - => (CondSelect x y (MOVBQZX check)) -(CondSelect x y check) && !check.Type.IsFlags() && check.Type.Size() == 2 - => (CondSelect x y (MOVWQZX check)) -(CondSelect x y check) && !check.Type.IsFlags() && check.Type.Size() == 4 - => (CondSelect x y (MOVLQZX check)) - (CondSelect x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t)) => (CMOVQNE y x (CMPQconst [0] check)) (CondSelect x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t) => (CMOVLNE y x (CMPQconst [0] check)) (CondSelect x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t) => (CMOVWNE y x (CMPQconst [0] check)) +(CondSelect x y check) && !check.Type.IsFlags() && check.Type.Size() == 4 && (is64BitInt(t) || isPtr(t)) + => (CMOVQNE y x (CMPLconst [0] check)) +(CondSelect x y check) && !check.Type.IsFlags() && check.Type.Size() == 4 && is32BitInt(t) + => (CMOVLNE y x (CMPLconst [0] check)) +(CondSelect x y check) && !check.Type.IsFlags() && check.Type.Size() == 4 && is16BitInt(t) + => (CMOVWNE y x (CMPLconst [0] check)) +(CondSelect x y check) && !check.Type.IsFlags() && check.Type.Size() == 2 && (is64BitInt(t) || isPtr(t)) + => (CMOVQNE y x (CMPWconst [0] check)) +(CondSelect x y check) && !check.Type.IsFlags() && check.Type.Size() == 2 && is32BitInt(t) + => (CMOVLNE y x (CMPWconst [0] check)) +(CondSelect x y check) && !check.Type.IsFlags() && check.Type.Size() == 2 && is16BitInt(t) + => (CMOVWNE y x (CMPWconst [0] check)) +(CondSelect x y check) && !check.Type.IsFlags() && check.Type.Size() == 1 && (is64BitInt(t) || isPtr(t)) + => (CMOVQNE y x (CMPBconst [0] check)) +(CondSelect x y check) && !check.Type.IsFlags() && check.Type.Size() == 1 && is32BitInt(t) + => (CMOVLNE y x (CMPBconst [0] check)) +(CondSelect x y check) && !check.Type.IsFlags() && check.Type.Size() == 1 && is16BitInt(t) + => (CMOVWNE y x (CMPBconst [0] check)) // Absorb InvertFlags (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond)) diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index e702925f5f..5982ad611a 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -24380,7 +24380,6 @@ func rewriteValueAMD64_OpCondSelect(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types // match: (CondSelect x y (SETEQ cond)) // cond: (is64BitInt(t) || isPtr(t)) // result: (CMOVQEQ y x cond) @@ -25138,113 +25137,221 @@ func rewriteValueAMD64_OpCondSelect(v *Value) bool { return true } // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 1 - // result: (CondSelect x y (MOVBQZX check)) + // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t)) + // result: (CMOVQNE y x (CMPQconst [0] check)) for { t := v.Type x := v_0 y := v_1 check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 1) { + if !(!check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))) { break } - v.reset(OpCondSelect) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt64) + v.reset(OpAMD64CMOVQNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) v0.AddArg(check) - v.AddArg3(x, y, v0) + v.AddArg3(y, x, v0) return true } // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 2 - // result: (CondSelect x y (MOVWQZX check)) + // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t) + // result: (CMOVLNE y x (CMPQconst [0] check)) for { t := v.Type x := v_0 y := v_1 check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 2) { + if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)) { break } - v.reset(OpCondSelect) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt64) + v.reset(OpAMD64CMOVLNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) v0.AddArg(check) - v.AddArg3(x, y, v0) + v.AddArg3(y, x, v0) return true } // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 4 - // result: (CondSelect x y (MOVLQZX check)) + // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t) + // result: (CMOVWNE y x (CMPQconst [0] check)) for { t := v.Type x := v_0 y := v_1 check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 4) { + if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)) { break } - v.reset(OpCondSelect) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64MOVLQZX, typ.UInt64) + v.reset(OpAMD64CMOVWNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) v0.AddArg(check) - v.AddArg3(x, y, v0) + v.AddArg3(y, x, v0) return true } // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t)) - // result: (CMOVQNE y x (CMPQconst [0] check)) + // cond: !check.Type.IsFlags() && check.Type.Size() == 4 && (is64BitInt(t) || isPtr(t)) + // result: (CMOVQNE y x (CMPLconst [0] check)) for { t := v.Type x := v_0 y := v_1 check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))) { + if !(!check.Type.IsFlags() && check.Type.Size() == 4 && (is64BitInt(t) || isPtr(t))) { break } v.reset(OpAMD64CMOVQNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) v0.AuxInt = int32ToAuxInt(0) v0.AddArg(check) v.AddArg3(y, x, v0) return true } // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t) - // result: (CMOVLNE y x (CMPQconst [0] check)) + // cond: !check.Type.IsFlags() && check.Type.Size() == 4 && is32BitInt(t) + // result: (CMOVLNE y x (CMPLconst [0] check)) for { t := v.Type x := v_0 y := v_1 check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)) { + if !(!check.Type.IsFlags() && check.Type.Size() == 4 && is32BitInt(t)) { break } v.reset(OpAMD64CMOVLNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) v0.AuxInt = int32ToAuxInt(0) v0.AddArg(check) v.AddArg3(y, x, v0) return true } // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t) - // result: (CMOVWNE y x (CMPQconst [0] check)) + // cond: !check.Type.IsFlags() && check.Type.Size() == 4 && is16BitInt(t) + // result: (CMOVWNE y x (CMPLconst [0] check)) for { t := v.Type x := v_0 y := v_1 check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)) { + if !(!check.Type.IsFlags() && check.Type.Size() == 4 && is16BitInt(t)) { break } v.reset(OpAMD64CMOVWNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) v0.AuxInt = int32ToAuxInt(0) v0.AddArg(check) v.AddArg3(y, x, v0) return true } + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 2 && (is64BitInt(t) || isPtr(t)) + // result: (CMOVQNE y x (CMPWconst [0] check)) + for { + t := v.Type + x := v_0 + y := v_1 + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 2 && (is64BitInt(t) || isPtr(t))) { + break + } + v.reset(OpAMD64CMOVQNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v0.AuxInt = int16ToAuxInt(0) + v0.AddArg(check) + v.AddArg3(y, x, v0) + return true + } + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 2 && is32BitInt(t) + // result: (CMOVLNE y x (CMPWconst [0] check)) + for { + t := v.Type + x := v_0 + y := v_1 + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 2 && is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v0.AuxInt = int16ToAuxInt(0) + v0.AddArg(check) + v.AddArg3(y, x, v0) + return true + } + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 2 && is16BitInt(t) + // result: (CMOVWNE y x (CMPWconst [0] check)) + for { + t := v.Type + x := v_0 + y := v_1 + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 2 && is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v0.AuxInt = int16ToAuxInt(0) + v0.AddArg(check) + v.AddArg3(y, x, v0) + return true + } + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 1 && (is64BitInt(t) || isPtr(t)) + // result: (CMOVQNE y x (CMPBconst [0] check)) + for { + t := v.Type + x := v_0 + y := v_1 + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 1 && (is64BitInt(t) || isPtr(t))) { + break + } + v.reset(OpAMD64CMOVQNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(0) + v0.AddArg(check) + v.AddArg3(y, x, v0) + return true + } + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 1 && is32BitInt(t) + // result: (CMOVLNE y x (CMPBconst [0] check)) + for { + t := v.Type + x := v_0 + y := v_1 + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 1 && is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(0) + v0.AddArg(check) + v.AddArg3(y, x, v0) + return true + } + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 1 && is16BitInt(t) + // result: (CMOVWNE y x (CMPBconst [0] check)) + for { + t := v.Type + x := v_0 + y := v_1 + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 1 && is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(0) + v0.AddArg(check) + v.AddArg3(y, x, v0) + return true + } return false } func rewriteValueAMD64_OpConst16(v *Value) bool { -- 2.52.0