]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: optimize slice bounds checking with SUB/SUBconst comparisons
authorAlexander Musman <alexander.musman@gmail.com>
Sat, 7 Jun 2025 10:18:58 +0000 (13:18 +0300)
committerGopher Robot <gobot@golang.org>
Thu, 24 Jul 2025 19:39:53 +0000 (12:39 -0700)
Optimize ARM64 code generation for slice bounds checking by recognizing
patterns where comparisons to zero involve SUB or SUBconst operations.
This change adds SSA opt rules to simplify:
 (CMPconst [0] (SUB x y)) => (CMP x y)

The optimizations apply to EQ, NE, ULE, and UGT comparisons, enabling
more efficient bounds checking for slice operations.

Code size improvement:
compile: .text:    9088004  ->  9065988 (-0.24%)
etcd:    .text:    10500276 -> 10497092 (-0.03%)
Change-Id: I467cb27674351652bcacc52b87e1f19677bd46a8
Reviewed-on: https://go-review.googlesource.com/c/go/+/679915
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Auto-Submit: Keith Randall <khr@golang.org>

src/cmd/compile/internal/ssa/_gen/ARM64.rules
src/cmd/compile/internal/ssa/rewriteARM64.go
test/codegen/slices.go

index 01fe3a74f7d551b25afc870ad858755a766dfd05..bf99737c719131dfc1cb2f6b974221bff9992ba1 100644 (file)
 ((EQ|NE) (CMPconst  [0] x) yes no) => ((Z|NZ)   x yes no)
 ((EQ|NE) (CMPWconst [0] x) yes no) => ((ZW|NZW) x yes no)
 
+((ULE|UGT) (CMPconst  [0] x)) => ((EQ|NE) (CMPconst  [0] x))
+((ULE|UGT) (CMPWconst [0] x)) => ((EQ|NE) (CMPWconst [0] x))
+
+((Z|NZ)   sub:(SUB        x y)) && sub.Uses == 1 => ((EQ|NE) (CMP                x y))
+((ZW|NZW) sub:(SUB        x y)) && sub.Uses == 1 => ((EQ|NE) (CMPW               x y))
+((Z|NZ)   sub:(SUBconst [c] y)) && sub.Uses == 1 => ((EQ|NE) (CMPconst         [c] y))
+((ZW|NZW) sub:(SUBconst [c] y)) && sub.Uses == 1 => ((EQ|NE) (CMPWconst [int32(c)] y))
+
 ((EQ|NE|LT|LE|GT|GE) (CMPconst  [0] z:(MADD a x y))  yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMN  a (MUL  <x.Type> x y)) yes no)
 ((EQ|NE|LT|LE|GT|GE) (CMPconst  [0] z:(MSUB a x y))  yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMP  a (MUL  <x.Type> x y)) yes no)
 ((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMNW a (MULW <x.Type> x y)) yes no)
index 792967c001ba84c03d4c66d60068b2731db374d4..19f69e5105df9bff1af0a3b736adf39a7a3dd98b 100644 (file)
@@ -25045,6 +25045,37 @@ func rewriteBlockARM64(b *Block) bool {
                        b.resetWithControl(BlockARM64FGE, cc)
                        return true
                }
+               // match: (NZ sub:(SUB x y))
+               // cond: sub.Uses == 1
+               // result: (NE (CMP x y))
+               for b.Controls[0].Op == OpARM64SUB {
+                       sub := b.Controls[0]
+                       y := sub.Args[1]
+                       x := sub.Args[0]
+                       if !(sub.Uses == 1) {
+                               break
+                       }
+                       v0 := b.NewValue0(sub.Pos, OpARM64CMP, types.TypeFlags)
+                       v0.AddArg2(x, y)
+                       b.resetWithControl(BlockARM64NE, v0)
+                       return true
+               }
+               // match: (NZ sub:(SUBconst [c] y))
+               // cond: sub.Uses == 1
+               // result: (NE (CMPconst [c] y))
+               for b.Controls[0].Op == OpARM64SUBconst {
+                       sub := b.Controls[0]
+                       c := auxIntToInt64(sub.AuxInt)
+                       y := sub.Args[0]
+                       if !(sub.Uses == 1) {
+                               break
+                       }
+                       v0 := b.NewValue0(sub.Pos, OpARM64CMPconst, types.TypeFlags)
+                       v0.AuxInt = int64ToAuxInt(c)
+                       v0.AddArg(y)
+                       b.resetWithControl(BlockARM64NE, v0)
+                       return true
+               }
                // match: (NZ (ANDconst [c] x) yes no)
                // cond: oneBit(c)
                // result: (TBNZ [int64(ntz64(c))] x yes no)
@@ -25083,6 +25114,37 @@ func rewriteBlockARM64(b *Block) bool {
                        return true
                }
        case BlockARM64NZW:
+               // match: (NZW sub:(SUB x y))
+               // cond: sub.Uses == 1
+               // result: (NE (CMPW x y))
+               for b.Controls[0].Op == OpARM64SUB {
+                       sub := b.Controls[0]
+                       y := sub.Args[1]
+                       x := sub.Args[0]
+                       if !(sub.Uses == 1) {
+                               break
+                       }
+                       v0 := b.NewValue0(sub.Pos, OpARM64CMPW, types.TypeFlags)
+                       v0.AddArg2(x, y)
+                       b.resetWithControl(BlockARM64NE, v0)
+                       return true
+               }
+               // match: (NZW sub:(SUBconst [c] y))
+               // cond: sub.Uses == 1
+               // result: (NE (CMPWconst [int32(c)] y))
+               for b.Controls[0].Op == OpARM64SUBconst {
+                       sub := b.Controls[0]
+                       c := auxIntToInt64(sub.AuxInt)
+                       y := sub.Args[0]
+                       if !(sub.Uses == 1) {
+                               break
+                       }
+                       v0 := b.NewValue0(sub.Pos, OpARM64CMPWconst, types.TypeFlags)
+                       v0.AuxInt = int32ToAuxInt(int32(c))
+                       v0.AddArg(y)
+                       b.resetWithControl(BlockARM64NE, v0)
+                       return true
+               }
                // match: (NZW (ANDconst [c] x) yes no)
                // cond: oneBit(int64(uint32(c)))
                // result: (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no)
@@ -25312,6 +25374,34 @@ func rewriteBlockARM64(b *Block) bool {
                        return true
                }
        case BlockARM64UGT:
+               // match: (UGT (CMPconst [0] x))
+               // result: (NE (CMPconst [0] x))
+               for b.Controls[0].Op == OpARM64CMPconst {
+                       v_0 := b.Controls[0]
+                       if auxIntToInt64(v_0.AuxInt) != 0 {
+                               break
+                       }
+                       x := v_0.Args[0]
+                       v0 := b.NewValue0(v_0.Pos, OpARM64CMPconst, types.TypeFlags)
+                       v0.AuxInt = int64ToAuxInt(0)
+                       v0.AddArg(x)
+                       b.resetWithControl(BlockARM64NE, v0)
+                       return true
+               }
+               // match: (UGT (CMPWconst [0] x))
+               // result: (NE (CMPWconst [0] x))
+               for b.Controls[0].Op == OpARM64CMPWconst {
+                       v_0 := b.Controls[0]
+                       if auxIntToInt32(v_0.AuxInt) != 0 {
+                               break
+                       }
+                       x := v_0.Args[0]
+                       v0 := b.NewValue0(v_0.Pos, OpARM64CMPWconst, types.TypeFlags)
+                       v0.AuxInt = int32ToAuxInt(0)
+                       v0.AddArg(x)
+                       b.resetWithControl(BlockARM64NE, v0)
+                       return true
+               }
                // match: (UGT (FlagConstant [fc]) yes no)
                // cond: fc.ugt()
                // result: (First yes no)
@@ -25346,6 +25436,34 @@ func rewriteBlockARM64(b *Block) bool {
                        return true
                }
        case BlockARM64ULE:
+               // match: (ULE (CMPconst [0] x))
+               // result: (EQ (CMPconst [0] x))
+               for b.Controls[0].Op == OpARM64CMPconst {
+                       v_0 := b.Controls[0]
+                       if auxIntToInt64(v_0.AuxInt) != 0 {
+                               break
+                       }
+                       x := v_0.Args[0]
+                       v0 := b.NewValue0(v_0.Pos, OpARM64CMPconst, types.TypeFlags)
+                       v0.AuxInt = int64ToAuxInt(0)
+                       v0.AddArg(x)
+                       b.resetWithControl(BlockARM64EQ, v0)
+                       return true
+               }
+               // match: (ULE (CMPWconst [0] x))
+               // result: (EQ (CMPWconst [0] x))
+               for b.Controls[0].Op == OpARM64CMPWconst {
+                       v_0 := b.Controls[0]
+                       if auxIntToInt32(v_0.AuxInt) != 0 {
+                               break
+                       }
+                       x := v_0.Args[0]
+                       v0 := b.NewValue0(v_0.Pos, OpARM64CMPWconst, types.TypeFlags)
+                       v0.AuxInt = int32ToAuxInt(0)
+                       v0.AddArg(x)
+                       b.resetWithControl(BlockARM64EQ, v0)
+                       return true
+               }
                // match: (ULE (FlagConstant [fc]) yes no)
                // cond: fc.ule()
                // result: (First yes no)
@@ -25414,6 +25532,37 @@ func rewriteBlockARM64(b *Block) bool {
                        return true
                }
        case BlockARM64Z:
+               // match: (Z sub:(SUB x y))
+               // cond: sub.Uses == 1
+               // result: (EQ (CMP x y))
+               for b.Controls[0].Op == OpARM64SUB {
+                       sub := b.Controls[0]
+                       y := sub.Args[1]
+                       x := sub.Args[0]
+                       if !(sub.Uses == 1) {
+                               break
+                       }
+                       v0 := b.NewValue0(sub.Pos, OpARM64CMP, types.TypeFlags)
+                       v0.AddArg2(x, y)
+                       b.resetWithControl(BlockARM64EQ, v0)
+                       return true
+               }
+               // match: (Z sub:(SUBconst [c] y))
+               // cond: sub.Uses == 1
+               // result: (EQ (CMPconst [c] y))
+               for b.Controls[0].Op == OpARM64SUBconst {
+                       sub := b.Controls[0]
+                       c := auxIntToInt64(sub.AuxInt)
+                       y := sub.Args[0]
+                       if !(sub.Uses == 1) {
+                               break
+                       }
+                       v0 := b.NewValue0(sub.Pos, OpARM64CMPconst, types.TypeFlags)
+                       v0.AuxInt = int64ToAuxInt(c)
+                       v0.AddArg(y)
+                       b.resetWithControl(BlockARM64EQ, v0)
+                       return true
+               }
                // match: (Z (ANDconst [c] x) yes no)
                // cond: oneBit(c)
                // result: (TBZ [int64(ntz64(c))] x yes no)
@@ -25452,6 +25601,37 @@ func rewriteBlockARM64(b *Block) bool {
                        return true
                }
        case BlockARM64ZW:
+               // match: (ZW sub:(SUB x y))
+               // cond: sub.Uses == 1
+               // result: (EQ (CMPW x y))
+               for b.Controls[0].Op == OpARM64SUB {
+                       sub := b.Controls[0]
+                       y := sub.Args[1]
+                       x := sub.Args[0]
+                       if !(sub.Uses == 1) {
+                               break
+                       }
+                       v0 := b.NewValue0(sub.Pos, OpARM64CMPW, types.TypeFlags)
+                       v0.AddArg2(x, y)
+                       b.resetWithControl(BlockARM64EQ, v0)
+                       return true
+               }
+               // match: (ZW sub:(SUBconst [c] y))
+               // cond: sub.Uses == 1
+               // result: (EQ (CMPWconst [int32(c)] y))
+               for b.Controls[0].Op == OpARM64SUBconst {
+                       sub := b.Controls[0]
+                       c := auxIntToInt64(sub.AuxInt)
+                       y := sub.Args[0]
+                       if !(sub.Uses == 1) {
+                               break
+                       }
+                       v0 := b.NewValue0(sub.Pos, OpARM64CMPWconst, types.TypeFlags)
+                       v0.AuxInt = int32ToAuxInt(int32(c))
+                       v0.AddArg(y)
+                       b.resetWithControl(BlockARM64EQ, v0)
+                       return true
+               }
                // match: (ZW (ANDconst [c] x) yes no)
                // cond: oneBit(int64(uint32(c)))
                // result: (TBZ [int64(ntz64(int64(uint32(c))))] x yes no)
index 9e8990c586efa67191a493315224d9cc8eaf86dd..30a131a5a5d97678d24449d444db3cc6638c07a6 100644 (file)
@@ -429,3 +429,21 @@ func Slice0(p *struct{}, i int) []struct{} {
        // amd64:-"MULQ"
        return unsafe.Slice(p, i)
 }
+
+// --------------------------------------- //
+//   Code generation for slice bounds      //
+//   checking comparison                   //
+// --------------------------------------- //
+
+func SlicePut(a []byte, c uint8) []byte {
+       // arm64:`CBZ\tR1`
+       a[0] = c
+       // arm64:`CMP\t\$1, R1`
+       a = a[1:]
+       a[0] = c
+       // arm64:`CMP\t\$2, R1`
+       a = a[1:]
+       a[0] = c
+       a = a[1:]
+       return a
+}