From bd80f74bc154585237a3c1b636e30dab6d781923 Mon Sep 17 00:00:00 2001 From: Alexander Musman Date: Sat, 7 Jun 2025 12:00:11 +0300 Subject: [PATCH] cmd/compile: fold shift through AND for slice operations Fold a shift through AND when the AND gets a zero-or-one operand (e.g. from arithmetic shift by 63 of a 64-bit value) for a common case with slice operations: ASR $63, R2, R2 AND R3<<3, R2, R2 ADD R2, R0, R2 As the operands are 64-bit, we can transform it to: AND R2->63, R3, R2 ADD R2<<3, R0, R2 Code size improvement: compile: .text: 9088004 -> 9086292 (-0.02%) etcd: .text: 10500276 -> 10498964 (-0.01%) Change-Id: Ibcd5e67173da39b77ceff77ca67812fb8be5a7b5 Reviewed-on: https://go-review.googlesource.com/c/go/+/679895 Reviewed-by: Keith Randall Reviewed-by: Mark Freeman Auto-Submit: Michael Knyszek LUCI-TryBot-Result: Go LUCI Reviewed-by: Michael Knyszek --- src/cmd/compile/internal/ssa/_gen/ARM64.rules | 4 ++ src/cmd/compile/internal/ssa/rewriteARM64.go | 60 +++++++++++++++++++ test/codegen/slices.go | 9 +++ 3 files changed, 73 insertions(+) diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64.rules b/src/cmd/compile/internal/ssa/_gen/ARM64.rules index bf99737c71..62f2c35235 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules @@ -1666,6 +1666,10 @@ (SRLconst [rc] (MOVHUreg x)) && rc >= 16 => (MOVDconst [0]) (SRLconst [rc] (MOVBUreg x)) && rc >= 8 => (MOVDconst [0]) +// Special cases for slice operations +(ADD x0 x1:(ANDshiftRA x2:(SLLconst [sl] y) z [63])) && x1.Uses == 1 && x2.Uses == 1 => (ADDshiftLL x0 (ANDshiftRA y z [63]) [sl]) +(ADD x0 x1:(ANDshiftLL x2:(SRAconst [63] z) y [sl])) && x1.Uses == 1 && x2.Uses == 1 => (ADDshiftLL x0 (ANDshiftRA y z [63]) [sl]) + // bitfield ops // sbfiz diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index 19f69e5105..eb3787829c 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -1592,6 +1592,66 @@ func rewriteValueARM64_OpARM64ADD(v *Value) bool { } break } + // match: (ADD x0 x1:(ANDshiftRA x2:(SLLconst [sl] y) z [63])) + // cond: x1.Uses == 1 && x2.Uses == 1 + // result: (ADDshiftLL x0 (ANDshiftRA y z [63]) [sl]) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x0 := v_0 + x1 := v_1 + if x1.Op != OpARM64ANDshiftRA || auxIntToInt64(x1.AuxInt) != 63 { + continue + } + z := x1.Args[1] + x2 := x1.Args[0] + if x2.Op != OpARM64SLLconst { + continue + } + sl := auxIntToInt64(x2.AuxInt) + y := x2.Args[0] + if !(x1.Uses == 1 && x2.Uses == 1) { + continue + } + v.reset(OpARM64ADDshiftLL) + v.AuxInt = int64ToAuxInt(sl) + v0 := b.NewValue0(v.Pos, OpARM64ANDshiftRA, y.Type) + v0.AuxInt = int64ToAuxInt(63) + v0.AddArg2(y, z) + v.AddArg2(x0, v0) + return true + } + break + } + // match: (ADD x0 x1:(ANDshiftLL x2:(SRAconst [63] z) y [sl])) + // cond: x1.Uses == 1 && x2.Uses == 1 + // result: (ADDshiftLL x0 (ANDshiftRA y z [63]) [sl]) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x0 := v_0 + x1 := v_1 + if x1.Op != OpARM64ANDshiftLL { + continue + } + sl := auxIntToInt64(x1.AuxInt) + y := x1.Args[1] + x2 := x1.Args[0] + if x2.Op != OpARM64SRAconst || auxIntToInt64(x2.AuxInt) != 63 { + continue + } + z := x2.Args[0] + if !(x1.Uses == 1 && x2.Uses == 1) { + continue + } + v.reset(OpARM64ADDshiftLL) + v.AuxInt = int64ToAuxInt(sl) + v0 := b.NewValue0(v.Pos, OpARM64ANDshiftRA, y.Type) + v0.AuxInt = int64ToAuxInt(63) + v0.AddArg2(y, z) + v.AddArg2(x0, v0) + return true + } + break + } return false } func rewriteValueARM64_OpARM64ADDSflags(v *Value) bool { diff --git a/test/codegen/slices.go b/test/codegen/slices.go index 30a131a5a5..1d918a3a0a 100644 --- a/test/codegen/slices.go +++ b/test/codegen/slices.go @@ -417,6 +417,15 @@ func SliceWithSubtractBound(a []int, b int) []int { return a[(3 - b):] } +// --------------------------------------- // +// ARM64 folding for slice masks // +// --------------------------------------- // + +func SliceAndIndex(a []int, b int) int { + // arm64:"AND\tR[0-9]+->63","ADD\tR[0-9]+<<3" + return a[b:][b] +} + // --------------------------------------- // // Code generation for unsafe.Slice // // --------------------------------------- // -- 2.51.0