From: Keith Randall Date: Tue, 4 Nov 2025 23:15:16 +0000 (-0800) Subject: cmd/internal/obj/arm64: shorten constant integer loads X-Git-Tag: go1.26rc1~340 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=3b3d6b9e5d9898810ee13e739f3ad759ab104fdb;p=gostls13.git cmd/internal/obj/arm64: shorten constant integer loads Large integer constants can take up to 4 instructions to encode. We can encode some large constants with a single instruction, namely those which are bit patterns (repetitions of certain runs of 0s and 1s). Often the constants we want to encode are *close* to those bit patterns, but don't exactly match. For those, we can use 2 instructions, one to load the close-by bit pattern and one to fix up any mismatches. The constants we use to strength reduce divides often fit this pattern. For unsigned divides by 1 through 15, this CL applies to the constant for N=3,5,6,10,12,15. Triggers 17 times in hello world. Change-Id: I623abf32961fb3e74d0a163f6822f0647cd94499 Reviewed-on: https://go-review.googlesource.com/c/go/+/717900 Auto-Submit: Keith Randall LUCI-TryBot-Result: Go LUCI Reviewed-by: Keith Randall Reviewed-by: Cherry Mui --- diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index ae10f347bb..773380e9bb 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -400,6 +400,8 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 MOVD $0x11110000, R1 // MOVD $286326784, R1 // 2122a2d2 MOVD $0xaaaa0000aaaa1111, R1 // MOVD $-6149102338357718767, R1 // 212282d24155b5f24155f5f2 MOVD $0x1111ffff1111aaaa, R1 // MOVD $1230045644216969898, R1 // a1aa8a922122a2f22122e2f2 + MOVD $0xaaaaaaaaaaaaaaab, R1 // MOVD $-6148914691236517205, R1 // e1f301b2615595f2 + MOVD $0x0ff019940ff00ff0, R1 // MOVD $1148446028692721648, R1 // e19f0cb28132c3f2 MOVD $0, R1 // e1031faa MOVD $-1, R1 // 01008092 MOVD $0x210000, R0 // MOVD $2162688, R0 // 2004a0d2 diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 7e7f028bfb..ccf8eda495 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -38,6 +38,7 @@ import ( "fmt" "log" "math" + "math/bits" "slices" "strings" ) @@ -1976,7 +1977,18 @@ func (c *ctxt7) con64class(a *obj.Addr) int { return C_MOVCON } else if zeroCount == 2 || negCount == 2 { return C_MOVCON2 - } else if zeroCount == 1 || negCount == 1 { + } + // See omovlconst for description of this loop. + for i := 0; i < 4; i++ { + mask := uint64(0xffff) << (i * 16) + for period := 2; period <= 32; period *= 2 { + x := uint64(a.Offset)&^mask | bits.RotateLeft64(uint64(a.Offset), max(period, 16))&mask + if isbitcon(x) { + return C_MOVCON2 + } + } + } + if zeroCount == 1 || negCount == 1 { return C_MOVCON3 } else { return C_VCON @@ -7555,6 +7567,31 @@ func (c *ctxt7) omovlconst(as obj.As, p *obj.Prog, a *obj.Addr, rt int, os []uin } } return 2 + } + + // Look for a two instruction pair, a bit pattern encodeable + // as a bitcon immediate plus a fixup MOVK instruction. + // Constants like this often occur from strength reduction of divides. + for i = 0; i < 4; i++ { + mask := uint64(0xffff) << (i * 16) + for period := 2; period <= 32; period *= 2 { // TODO: handle period==64 somehow? + // Copy in bits from outside of the masked region + x := uint64(d)&^mask | bits.RotateLeft64(uint64(d), max(period, 16))&mask + if isbitcon(x) { + // ORR $c1, ZR, rt + os[0] = c.opirr(p, AORR) + os[0] |= bitconEncode(x, 64) | uint32(REGZERO&31)<<5 | uint32(rt&31) + // MOVK $c2<<(i*16), rt + os[1] = c.opirr(p, AMOVK) + os[1] |= MOVCONST(d, i, rt) + return 2 + } + } + } + // TODO: other fixups, like ADD or SUB? + // TODO: 3-instruction variant, instead of the full MOVD+3*MOVK version below? + + switch { case zeroCount == 1: // one MOVZ and two MOVKs diff --git a/src/cmd/internal/obj/arm64/asm_arm64_test.go b/src/cmd/internal/obj/arm64/asm_arm64_test.go index 83d137a084..b83db60b40 100644 --- a/src/cmd/internal/obj/arm64/asm_arm64_test.go +++ b/src/cmd/internal/obj/arm64/asm_arm64_test.go @@ -38,3 +38,16 @@ func TestMOVK(t *testing.T) { t.Errorf("Got %x want %x\n", x, want) } } + +func testCombined() (a uint64, b uint64) +func TestCombined(t *testing.T) { + got1, got2 := testCombined() + want1 := uint64(0xaaaaaaaaaaaaaaab) + want2 := uint64(0x0ff019940ff00ff0) + if got1 != want1 { + t.Errorf("First result, got %x want %x", got1, want1) + } + if got2 != want2 { + t.Errorf("First result, got %x want %x", got2, want2) + } +} diff --git a/src/cmd/internal/obj/arm64/asm_arm64_test.s b/src/cmd/internal/obj/arm64/asm_arm64_test.s index e3fda57775..65d80d1380 100644 --- a/src/cmd/internal/obj/arm64/asm_arm64_test.s +++ b/src/cmd/internal/obj/arm64/asm_arm64_test.s @@ -37,3 +37,11 @@ TEXT ·testmovk(SB), NOSPLIT, $0-8 MOVK $(40000<<48), R0 MOVD R0, ret+0(FP) RET + +// testCombined() (uint64, uint64) +TEXT ·testCombined(SB), NOSPLIT, $0-16 + MOVD $0xaaaaaaaaaaaaaaab, R0 + MOVD $0x0ff019940ff00ff0, R1 + MOVD R0, a+0(FP) + MOVD R1, b+8(FP) + RET