From: Alexander Musman Date: Sat, 7 Jun 2025 10:18:58 +0000 (+0300) Subject: cmd/compile: optimize slice bounds checking with SUB/SUBconst comparisons X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=dcb479c2f9e6c379ee01efb3b1fa8a4e784f8503;p=gostls13.git cmd/compile: optimize slice bounds checking with SUB/SUBconst comparisons Optimize ARM64 code generation for slice bounds checking by recognizing patterns where comparisons to zero involve SUB or SUBconst operations. This change adds SSA opt rules to simplify: (CMPconst [0] (SUB x y)) => (CMP x y) The optimizations apply to EQ, NE, ULE, and UGT comparisons, enabling more efficient bounds checking for slice operations. Code size improvement: compile: .text: 9088004 -> 9065988 (-0.24%) etcd: .text: 10500276 -> 10497092 (-0.03%) Change-Id: I467cb27674351652bcacc52b87e1f19677bd46a8 Reviewed-on: https://go-review.googlesource.com/c/go/+/679915 LUCI-TryBot-Result: Go LUCI Reviewed-by: Keith Randall Reviewed-by: Keith Randall Reviewed-by: Michael Knyszek Auto-Submit: Keith Randall --- diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64.rules b/src/cmd/compile/internal/ssa/_gen/ARM64.rules index 01fe3a74f7..bf99737c71 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules @@ -683,6 +683,14 @@ ((EQ|NE) (CMPconst [0] x) yes no) => ((Z|NZ) x yes no) ((EQ|NE) (CMPWconst [0] x) yes no) => ((ZW|NZW) x yes no) +((ULE|UGT) (CMPconst [0] x)) => ((EQ|NE) (CMPconst [0] x)) +((ULE|UGT) (CMPWconst [0] x)) => ((EQ|NE) (CMPWconst [0] x)) + +((Z|NZ) sub:(SUB x y)) && sub.Uses == 1 => ((EQ|NE) (CMP x y)) +((ZW|NZW) sub:(SUB x y)) && sub.Uses == 1 => ((EQ|NE) (CMPW x y)) +((Z|NZ) sub:(SUBconst [c] y)) && sub.Uses == 1 => ((EQ|NE) (CMPconst [c] y)) +((ZW|NZW) sub:(SUBconst [c] y)) && sub.Uses == 1 => ((EQ|NE) (CMPWconst [int32(c)] y)) + ((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMN a (MUL x y)) yes no) ((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMP a (MUL x y)) yes no) ((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMNW a (MULW x y)) yes no) diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index 792967c001..19f69e5105 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -25045,6 +25045,37 @@ func rewriteBlockARM64(b *Block) bool { b.resetWithControl(BlockARM64FGE, cc) return true } + // match: (NZ sub:(SUB x y)) + // cond: sub.Uses == 1 + // result: (NE (CMP x y)) + for b.Controls[0].Op == OpARM64SUB { + sub := b.Controls[0] + y := sub.Args[1] + x := sub.Args[0] + if !(sub.Uses == 1) { + break + } + v0 := b.NewValue0(sub.Pos, OpARM64CMP, types.TypeFlags) + v0.AddArg2(x, y) + b.resetWithControl(BlockARM64NE, v0) + return true + } + // match: (NZ sub:(SUBconst [c] y)) + // cond: sub.Uses == 1 + // result: (NE (CMPconst [c] y)) + for b.Controls[0].Op == OpARM64SUBconst { + sub := b.Controls[0] + c := auxIntToInt64(sub.AuxInt) + y := sub.Args[0] + if !(sub.Uses == 1) { + break + } + v0 := b.NewValue0(sub.Pos, OpARM64CMPconst, types.TypeFlags) + v0.AuxInt = int64ToAuxInt(c) + v0.AddArg(y) + b.resetWithControl(BlockARM64NE, v0) + return true + } // match: (NZ (ANDconst [c] x) yes no) // cond: oneBit(c) // result: (TBNZ [int64(ntz64(c))] x yes no) @@ -25083,6 +25114,37 @@ func rewriteBlockARM64(b *Block) bool { return true } case BlockARM64NZW: + // match: (NZW sub:(SUB x y)) + // cond: sub.Uses == 1 + // result: (NE (CMPW x y)) + for b.Controls[0].Op == OpARM64SUB { + sub := b.Controls[0] + y := sub.Args[1] + x := sub.Args[0] + if !(sub.Uses == 1) { + break + } + v0 := b.NewValue0(sub.Pos, OpARM64CMPW, types.TypeFlags) + v0.AddArg2(x, y) + b.resetWithControl(BlockARM64NE, v0) + return true + } + // match: (NZW sub:(SUBconst [c] y)) + // cond: sub.Uses == 1 + // result: (NE (CMPWconst [int32(c)] y)) + for b.Controls[0].Op == OpARM64SUBconst { + sub := b.Controls[0] + c := auxIntToInt64(sub.AuxInt) + y := sub.Args[0] + if !(sub.Uses == 1) { + break + } + v0 := b.NewValue0(sub.Pos, OpARM64CMPWconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(int32(c)) + v0.AddArg(y) + b.resetWithControl(BlockARM64NE, v0) + return true + } // match: (NZW (ANDconst [c] x) yes no) // cond: oneBit(int64(uint32(c))) // result: (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no) @@ -25312,6 +25374,34 @@ func rewriteBlockARM64(b *Block) bool { return true } case BlockARM64UGT: + // match: (UGT (CMPconst [0] x)) + // result: (NE (CMPconst [0] x)) + for b.Controls[0].Op == OpARM64CMPconst { + v_0 := b.Controls[0] + if auxIntToInt64(v_0.AuxInt) != 0 { + break + } + x := v_0.Args[0] + v0 := b.NewValue0(v_0.Pos, OpARM64CMPconst, types.TypeFlags) + v0.AuxInt = int64ToAuxInt(0) + v0.AddArg(x) + b.resetWithControl(BlockARM64NE, v0) + return true + } + // match: (UGT (CMPWconst [0] x)) + // result: (NE (CMPWconst [0] x)) + for b.Controls[0].Op == OpARM64CMPWconst { + v_0 := b.Controls[0] + if auxIntToInt32(v_0.AuxInt) != 0 { + break + } + x := v_0.Args[0] + v0 := b.NewValue0(v_0.Pos, OpARM64CMPWconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) + v0.AddArg(x) + b.resetWithControl(BlockARM64NE, v0) + return true + } // match: (UGT (FlagConstant [fc]) yes no) // cond: fc.ugt() // result: (First yes no) @@ -25346,6 +25436,34 @@ func rewriteBlockARM64(b *Block) bool { return true } case BlockARM64ULE: + // match: (ULE (CMPconst [0] x)) + // result: (EQ (CMPconst [0] x)) + for b.Controls[0].Op == OpARM64CMPconst { + v_0 := b.Controls[0] + if auxIntToInt64(v_0.AuxInt) != 0 { + break + } + x := v_0.Args[0] + v0 := b.NewValue0(v_0.Pos, OpARM64CMPconst, types.TypeFlags) + v0.AuxInt = int64ToAuxInt(0) + v0.AddArg(x) + b.resetWithControl(BlockARM64EQ, v0) + return true + } + // match: (ULE (CMPWconst [0] x)) + // result: (EQ (CMPWconst [0] x)) + for b.Controls[0].Op == OpARM64CMPWconst { + v_0 := b.Controls[0] + if auxIntToInt32(v_0.AuxInt) != 0 { + break + } + x := v_0.Args[0] + v0 := b.NewValue0(v_0.Pos, OpARM64CMPWconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) + v0.AddArg(x) + b.resetWithControl(BlockARM64EQ, v0) + return true + } // match: (ULE (FlagConstant [fc]) yes no) // cond: fc.ule() // result: (First yes no) @@ -25414,6 +25532,37 @@ func rewriteBlockARM64(b *Block) bool { return true } case BlockARM64Z: + // match: (Z sub:(SUB x y)) + // cond: sub.Uses == 1 + // result: (EQ (CMP x y)) + for b.Controls[0].Op == OpARM64SUB { + sub := b.Controls[0] + y := sub.Args[1] + x := sub.Args[0] + if !(sub.Uses == 1) { + break + } + v0 := b.NewValue0(sub.Pos, OpARM64CMP, types.TypeFlags) + v0.AddArg2(x, y) + b.resetWithControl(BlockARM64EQ, v0) + return true + } + // match: (Z sub:(SUBconst [c] y)) + // cond: sub.Uses == 1 + // result: (EQ (CMPconst [c] y)) + for b.Controls[0].Op == OpARM64SUBconst { + sub := b.Controls[0] + c := auxIntToInt64(sub.AuxInt) + y := sub.Args[0] + if !(sub.Uses == 1) { + break + } + v0 := b.NewValue0(sub.Pos, OpARM64CMPconst, types.TypeFlags) + v0.AuxInt = int64ToAuxInt(c) + v0.AddArg(y) + b.resetWithControl(BlockARM64EQ, v0) + return true + } // match: (Z (ANDconst [c] x) yes no) // cond: oneBit(c) // result: (TBZ [int64(ntz64(c))] x yes no) @@ -25452,6 +25601,37 @@ func rewriteBlockARM64(b *Block) bool { return true } case BlockARM64ZW: + // match: (ZW sub:(SUB x y)) + // cond: sub.Uses == 1 + // result: (EQ (CMPW x y)) + for b.Controls[0].Op == OpARM64SUB { + sub := b.Controls[0] + y := sub.Args[1] + x := sub.Args[0] + if !(sub.Uses == 1) { + break + } + v0 := b.NewValue0(sub.Pos, OpARM64CMPW, types.TypeFlags) + v0.AddArg2(x, y) + b.resetWithControl(BlockARM64EQ, v0) + return true + } + // match: (ZW sub:(SUBconst [c] y)) + // cond: sub.Uses == 1 + // result: (EQ (CMPWconst [int32(c)] y)) + for b.Controls[0].Op == OpARM64SUBconst { + sub := b.Controls[0] + c := auxIntToInt64(sub.AuxInt) + y := sub.Args[0] + if !(sub.Uses == 1) { + break + } + v0 := b.NewValue0(sub.Pos, OpARM64CMPWconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(int32(c)) + v0.AddArg(y) + b.resetWithControl(BlockARM64EQ, v0) + return true + } // match: (ZW (ANDconst [c] x) yes no) // cond: oneBit(int64(uint32(c))) // result: (TBZ [int64(ntz64(int64(uint32(c))))] x yes no) diff --git a/test/codegen/slices.go b/test/codegen/slices.go index 9e8990c586..30a131a5a5 100644 --- a/test/codegen/slices.go +++ b/test/codegen/slices.go @@ -429,3 +429,21 @@ func Slice0(p *struct{}, i int) []struct{} { // amd64:-"MULQ" return unsafe.Slice(p, i) } + +// --------------------------------------- // +// Code generation for slice bounds // +// checking comparison // +// --------------------------------------- // + +func SlicePut(a []byte, c uint8) []byte { + // arm64:`CBZ\tR1` + a[0] = c + // arm64:`CMP\t\$1, R1` + a = a[1:] + a[0] = c + // arm64:`CMP\t\$2, R1` + a = a[1:] + a[0] = c + a = a[1:] + return a +}