]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/internal/obj/arm64: shorten constant integer loads
authorKeith Randall <khr@golang.org>
Tue, 4 Nov 2025 23:15:16 +0000 (15:15 -0800)
committerGopher Robot <gobot@golang.org>
Mon, 10 Nov 2025 17:34:13 +0000 (09:34 -0800)
Large integer constants can take up to 4 instructions to encode.

We can encode some large constants with a single instruction, namely
those which are bit patterns (repetitions of certain runs of 0s and 1s).

Often the constants we want to encode are *close* to those bit patterns,
but don't exactly match. For those, we can use 2 instructions, one to
load the close-by bit pattern and one to fix up any mismatches.

The constants we use to strength reduce divides often fit this pattern.
For unsigned divides by 1 through 15, this CL applies to the constant
for N=3,5,6,10,12,15.

Triggers 17 times in hello world.

Change-Id: I623abf32961fb3e74d0a163f6822f0647cd94499
Reviewed-on: https://go-review.googlesource.com/c/go/+/717900
Auto-Submit: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
src/cmd/asm/internal/asm/testdata/arm64.s
src/cmd/internal/obj/arm64/asm7.go
src/cmd/internal/obj/arm64/asm_arm64_test.go
src/cmd/internal/obj/arm64/asm_arm64_test.s

index ae10f347bba101ca0f96db0f239955cfab6a403f..773380e9bb67a5bd744f9d63befca3d82f4ff819 100644 (file)
@@ -400,6 +400,8 @@ TEXT        foo(SB), DUPOK|NOSPLIT, $-8
        MOVD    $0x11110000, R1               // MOVD   $286326784, R1              // 2122a2d2
        MOVD    $0xaaaa0000aaaa1111, R1       // MOVD   $-6149102338357718767, R1   // 212282d24155b5f24155f5f2
        MOVD    $0x1111ffff1111aaaa, R1       // MOVD   $1230045644216969898, R1    // a1aa8a922122a2f22122e2f2
+       MOVD    $0xaaaaaaaaaaaaaaab, R1       // MOVD   $-6148914691236517205, R1   // e1f301b2615595f2
+       MOVD    $0x0ff019940ff00ff0, R1       // MOVD   $1148446028692721648, R1    // e19f0cb28132c3f2
        MOVD    $0, R1                        // e1031faa
        MOVD    $-1, R1                       // 01008092
        MOVD    $0x210000, R0                 // MOVD   $2162688, R0                // 2004a0d2
index 7e7f028bfb3d2b6abb251fa91e6cc813c453d808..ccf8eda495a83c30456bbd2cd80eff7eb1730777 100644 (file)
@@ -38,6 +38,7 @@ import (
        "fmt"
        "log"
        "math"
+       "math/bits"
        "slices"
        "strings"
 )
@@ -1976,7 +1977,18 @@ func (c *ctxt7) con64class(a *obj.Addr) int {
                return C_MOVCON
        } else if zeroCount == 2 || negCount == 2 {
                return C_MOVCON2
-       } else if zeroCount == 1 || negCount == 1 {
+       }
+       // See omovlconst for description of this loop.
+       for i := 0; i < 4; i++ {
+               mask := uint64(0xffff) << (i * 16)
+               for period := 2; period <= 32; period *= 2 {
+                       x := uint64(a.Offset)&^mask | bits.RotateLeft64(uint64(a.Offset), max(period, 16))&mask
+                       if isbitcon(x) {
+                               return C_MOVCON2
+                       }
+               }
+       }
+       if zeroCount == 1 || negCount == 1 {
                return C_MOVCON3
        } else {
                return C_VCON
@@ -7555,6 +7567,31 @@ func (c *ctxt7) omovlconst(as obj.As, p *obj.Prog, a *obj.Addr, rt int, os []uin
                                }
                        }
                        return 2
+               }
+
+               // Look for a two instruction pair, a bit pattern encodeable
+               // as a bitcon immediate plus a fixup MOVK instruction.
+               // Constants like this often occur from strength reduction of divides.
+               for i = 0; i < 4; i++ {
+                       mask := uint64(0xffff) << (i * 16)
+                       for period := 2; period <= 32; period *= 2 { // TODO: handle period==64 somehow?
+                               // Copy in bits from outside of the masked region
+                               x := uint64(d)&^mask | bits.RotateLeft64(uint64(d), max(period, 16))&mask
+                               if isbitcon(x) {
+                                       // ORR $c1, ZR, rt
+                                       os[0] = c.opirr(p, AORR)
+                                       os[0] |= bitconEncode(x, 64) | uint32(REGZERO&31)<<5 | uint32(rt&31)
+                                       // MOVK $c2<<(i*16), rt
+                                       os[1] = c.opirr(p, AMOVK)
+                                       os[1] |= MOVCONST(d, i, rt)
+                                       return 2
+                               }
+                       }
+               }
+               // TODO: other fixups, like ADD or SUB?
+               // TODO: 3-instruction variant, instead of the full MOVD+3*MOVK version below?
+
+               switch {
 
                case zeroCount == 1:
                        // one MOVZ and two MOVKs
index 83d137a08466bd2f66b0dd751b93bf5d7bc2f616..b83db60b40f118abe35999d1729e19cb86cffd81 100644 (file)
@@ -38,3 +38,16 @@ func TestMOVK(t *testing.T) {
                t.Errorf("Got %x want %x\n", x, want)
        }
 }
+
+func testCombined() (a uint64, b uint64)
+func TestCombined(t *testing.T) {
+       got1, got2 := testCombined()
+       want1 := uint64(0xaaaaaaaaaaaaaaab)
+       want2 := uint64(0x0ff019940ff00ff0)
+       if got1 != want1 {
+               t.Errorf("First result, got %x want %x", got1, want1)
+       }
+       if got2 != want2 {
+               t.Errorf("First result, got %x want %x", got2, want2)
+       }
+}
index e3fda57775f3af3032d411aac4ff896505872f27..65d80d1380f1ccba82ee67375472c8eab06e9487 100644 (file)
@@ -37,3 +37,11 @@ TEXT ·testmovk(SB), NOSPLIT, $0-8
        MOVK    $(40000<<48), R0
        MOVD    R0, ret+0(FP)
        RET
+
+// testCombined() (uint64, uint64)
+TEXT ·testCombined(SB), NOSPLIT, $0-16
+       MOVD    $0xaaaaaaaaaaaaaaab, R0
+       MOVD    $0x0ff019940ff00ff0, R1
+       MOVD    R0, a+0(FP)
+       MOVD    R1, b+8(FP)
+       RET