]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/internal/obj/arm64: factor out splitting of 24 bit unsigned scaled immediates
authorJoel Sing <joel@sing.id.au>
Fri, 28 Apr 2023 20:07:42 +0000 (06:07 +1000)
committerJoel Sing <joel@sing.id.au>
Wed, 26 Jul 2023 12:39:29 +0000 (12:39 +0000)
Rather than duplicating this code, factor it out into a function and
add test coverage.

Change-Id: I37ce568ded4659d98a4ff1361520c5fb2207e947
Reviewed-on: https://go-review.googlesource.com/c/go/+/512537
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
src/cmd/asm/internal/asm/testdata/arm64.s
src/cmd/internal/obj/arm64/asm7.go
src/cmd/internal/obj/arm64/asm_arm64_test.go

index 2c7d638319b554cdfeabe4bf5cc84d78784fa3ed..54f4de76d88242d736ea42f61ee0ce0f80d36c1e 100644 (file)
@@ -558,36 +558,60 @@ TEXT      foo(SB), DUPOK|NOSPLIT, $-8
        FMOVQ   64(RSP), F11    // eb13c03d
 
 // large aligned offset, use two instructions(add+ldr/store).
-       MOVB    R1, 0x1001(R2) // MOVB  R1, 4097(R2)  // 5b04409161070039
-       MOVH    R1, 0x2002(R2) // MOVH  R1, 8194(R2)  // 5b08409161070079
-       MOVW    R1, 0x4004(R2) // MOVW  R1, 16388(R2) // 5b104091610700b9
-       MOVD    R1, 0x8008(R2) // MOVD  R1, 32776(R2) // 5b204091610700f9
-       FMOVS   F1, 0x4004(R2) // FMOVS F1, 16388(R2) // 5b104091610700bd
-       FMOVD   F1, 0x8008(R2) // FMOVD F1, 32776(R2) // 5b204091610700fd
+       MOVB    R1, 0x1001(R2)          // MOVB         R1, 4097(R2)            // 5b04409161070039
+       MOVB    R1, 0xffffff(R2)        // MOVB         R1, 16777215(R2)        // 5bfc7f9161ff3f39
+       MOVH    R1, 0x2002(R2)          // MOVH         R1, 8194(R2)            // 5b08409161070079
+       MOVH    R1, 0xfffffe(R2)        // MOVH         R1, 16777214(R2)        // 5bf87f9161ff3f79
+       MOVW    R1, 0x4004(R2)          // MOVW         R1, 16388(R2)           // 5b104091610700b9
+       MOVW    R1, 0xfffffc(R2)        // MOVW         R1, 16777212(R2)        // 5bf07f9161ff3fb9
+       MOVD    R1, 0x8008(R2)          // MOVD         R1, 32776(R2)           // 5b204091610700f9
+       MOVD    R1, 0xfffff8(R2)        // MOVD         R1, 16777208(R2)        // 5be07f9161ff3ff9
+       FMOVS   F1, 0x4004(R2)          // FMOVS        F1, 16388(R2)           // 5b104091610700bd
+       FMOVS   F1, 0xfffffc(R2)        // FMOVS        F1, 16777212(R2)        // 5bf07f9161ff3fbd
+       FMOVD   F1, 0x8008(R2)          // FMOVD        F1, 32776(R2)           // 5b204091610700fd
+       FMOVD   F1, 0xfffff8(R2)        // FMOVD        F1, 16777208(R2)        // 5be07f9161ff3ffd
 
-       MOVB    0x1001(R1), R2 // MOVB  4097(R1), R2  // 3b04409162078039
-       MOVH    0x2002(R1), R2 // MOVH  8194(R1), R2  // 3b08409162078079
-       MOVW    0x4004(R1), R2 // MOVW  16388(R1), R2 // 3b104091620780b9
-       MOVD    0x8008(R1), R2 // MOVD  32776(R1), R2 // 3b204091620740f9
-       FMOVS   0x4004(R1), F2 // FMOVS 16388(R1), F2 // 3b104091620740bd
-       FMOVD   0x8008(R1), F2 // FMOVD 32776(R1), F2 // 3b204091620740fd
+       MOVB    0x1001(R1), R2          // MOVB         4097(R1), R2            // 3b04409162078039
+       MOVB    0xffffff(R1), R2        // MOVB         16777215(R1), R2        // 3bfc7f9162ffbf39
+       MOVH    0x2002(R1), R2          // MOVH         8194(R1), R2            // 3b08409162078079
+       MOVH    0xfffffe(R1), R2        // MOVH         16777214(R1), R2        // 3bf87f9162ffbf79
+       MOVW    0x4004(R1), R2          // MOVW         16388(R1), R2           // 3b104091620780b9
+       MOVW    0xfffffc(R1), R2        // MOVW         16777212(R1), R2        // 3bf07f9162ffbfb9
+       MOVD    0x8008(R1), R2          // MOVD         32776(R1), R2           // 3b204091620740f9
+       MOVD    0xfffff8(R1), R2        // MOVD         16777208(R1), R2        // 3be07f9162ff7ff9
+       FMOVS   0x4004(R1), F2          // FMOVS        16388(R1), F2           // 3b104091620740bd
+       FMOVS   0xfffffc(R1), F2        // FMOVS        16777212(R1), F2        // 3bf07f9162ff7fbd
+       FMOVD   0x8008(R1), F2          // FMOVD        32776(R1), F2           // 3b204091620740fd
+       FMOVD   0xfffff8(R1), F2        // FMOVD        16777208(R1), F2        // 3be07f9162ff7ffd
 
 // very large or unaligned offset uses constant pool.
 // the encoding cannot be checked as the address of the constant pool is unknown.
 // here we only test that they can be assembled.
-       MOVB    R1, 0x44332211(R2) // MOVB      R1, 1144201745(R2)
-       MOVH    R1, 0x44332211(R2) // MOVH      R1, 1144201745(R2)
-       MOVW    R1, 0x44332211(R2) // MOVW      R1, 1144201745(R2)
-       MOVD    R1, 0x44332211(R2) // MOVD      R1, 1144201745(R2)
-       FMOVS   F1, 0x44332211(R2) // FMOVS     F1, 1144201745(R2)
-       FMOVD   F1, 0x44332211(R2) // FMOVD     F1, 1144201745(R2)
+       MOVB    R1, 0x1000000(R2)       // MOVB         R1, 16777216(R2)
+       MOVB    R1, 0x44332211(R2)      // MOVB         R1, 1144201745(R2)
+       MOVH    R1, 0x1000000(R2)       // MOVH         R1, 16777216(R2)
+       MOVH    R1, 0x44332211(R2)      // MOVH         R1, 1144201745(R2)
+       MOVW    R1, 0x1000000(R2)       // MOVW         R1, 16777216(R2)
+       MOVW    R1, 0x44332211(R2)      // MOVW         R1, 1144201745(R2)
+       MOVD    R1, 0x1000000(R2)       // MOVD         R1, 16777216(R2)
+       MOVD    R1, 0x44332211(R2)      // MOVD         R1, 1144201745(R2)
+       FMOVS   F1, 0x1000000(R2)       // FMOVS        F1, 16777216(R2)
+       FMOVS   F1, 0x44332211(R2)      // FMOVS        F1, 1144201745(R2)
+       FMOVD   F1, 0x1000000(R2)       // FMOVD        F1, 16777216(R2)
+       FMOVD   F1, 0x44332211(R2)      // FMOVD        F1, 1144201745(R2)
 
-       MOVB    0x44332211(R1), R2 // MOVB      1144201745(R1), R2
-       MOVH    0x44332211(R1), R2 // MOVH      1144201745(R1), R2
-       MOVW    0x44332211(R1), R2 // MOVW      1144201745(R1), R2
-       MOVD    0x44332211(R1), R2 // MOVD      1144201745(R1), R2
-       FMOVS   0x44332211(R1), F2 // FMOVS     1144201745(R1), F2
-       FMOVD   0x44332211(R1), F2 // FMOVD     1144201745(R1), F2
+       MOVB    0x1000000(R1), R2       // MOVB         16777216(R1), R2
+       MOVB    0x44332211(R1), R2      // MOVB         1144201745(R1), R2
+       MOVH    0x1000000(R1), R2       // MOVH         16777216(R1), R2
+       MOVH    0x44332211(R1), R2      // MOVH         1144201745(R1), R2
+       MOVW    0x1000000(R1), R2       // MOVW         16777216(R1), R2
+       MOVW    0x44332211(R1), R2      // MOVW         1144201745(R1), R2
+       MOVD    0x1000000(R1), R2       // MOVD         16777216(R1), R2
+       MOVD    0x44332211(R1), R2      // MOVD         1144201745(R1), R2
+       FMOVS   0x1000000(R1), F2       // FMOVS        16777216(R1), F2
+       FMOVS   0x44332211(R1), F2      // FMOVS        1144201745(R1), F2
+       FMOVD   0x1000000(R1), F2       // FMOVD        16777216(R1), F2
+       FMOVD   0x44332211(R1), F2      // FMOVD        1144201745(R1), F2
 
 // shifted or extended register offset.
        MOVD    (R2)(R6.SXTW), R4               // 44c866f8
index d5f3f20410c6cdc4581057de35a0647011a45845..88c46a80c50956f4da52279f41f5473e00729456 100644 (file)
@@ -1384,6 +1384,25 @@ func roundUp(x, to uint32) uint32 {
        return (x + to - 1) &^ (to - 1)
 }
 
+// splitImm24uScaled splits an immediate into a scaled 12 bit unsigned lo value
+// and an unscaled shifted 12 bit unsigned hi value. These are typically used
+// by adding or subtracting the hi value and using the lo value as the offset
+// for a load or store.
+func splitImm24uScaled(v int32, shift int) (int32, int32, error) {
+       if v < 0 {
+               return 0, 0, fmt.Errorf("%d is not a 24 bit unsigned immediate", v)
+       }
+       if v&((1<<shift)-1) != 0 {
+               return 0, 0, fmt.Errorf("%d is not a multiple of %d", v, 1<<shift)
+       }
+       lo := (v >> shift) & 0xfff
+       hi := v - (lo << shift)
+       if hi&^0xfff000 != 0 {
+               return 0, 0, fmt.Errorf("%d is too large for a scaled 24 bit unsigned immediate %x %x", v, lo, hi)
+       }
+       return hi, lo, nil
+}
+
 func (c *ctxt7) regoff(a *obj.Addr) int32 {
        c.instoffset = 0
        c.aclass(a)
@@ -3908,23 +3927,12 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
                }
 
                v := c.regoff(&p.To)
-               var hi int32
-               if v < 0 || (v&((1<<uint(s))-1)) != 0 {
-                       // negative or unaligned offset, use constant pool
-                       goto storeusepool
-               }
-
-               hi = v - (v & (0xFFF << uint(s)))
-               if hi&0xFFF != 0 {
-                       c.ctxt.Diag("internal: miscalculated offset %d [%d]\n%v", v, s, p)
-               }
-               if hi&^0xFFF000 != 0 {
-                       // hi doesn't fit into an ADD instruction
+               hi, lo, err := splitImm24uScaled(v, s)
+               if err != nil {
                        goto storeusepool
                }
-
                o1 = c.oaddi(p, AADD, hi, REGTMP, r)
-               o2 = c.olsr12u(p, c.opstr(p, p.As), ((v-hi)>>uint(s))&0xFFF, REGTMP, p.From.Reg)
+               o2 = c.olsr12u(p, c.opstr(p, p.As), lo, REGTMP, p.From.Reg)
                break
 
        storeusepool:
@@ -3952,23 +3960,12 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
                }
 
                v := c.regoff(&p.From)
-               var hi int32
-               if v < 0 || (v&((1<<uint(s))-1)) != 0 {
-                       // negative or unaligned offset, use constant pool
-                       goto loadusepool
-               }
-
-               hi = v - (v & (0xFFF << uint(s)))
-               if (hi & 0xFFF) != 0 {
-                       c.ctxt.Diag("internal: miscalculated offset %d [%d]\n%v", v, s, p)
-               }
-               if hi&^0xFFF000 != 0 {
-                       // hi doesn't fit into an ADD instruction
+               hi, lo, err := splitImm24uScaled(v, s)
+               if err != nil {
                        goto loadusepool
                }
-
                o1 = c.oaddi(p, AADD, hi, REGTMP, r)
-               o2 = c.olsr12u(p, c.opldr(p, p.As), ((v-hi)>>uint(s))&0xFFF, REGTMP, p.To.Reg)
+               o2 = c.olsr12u(p, c.opldr(p, p.As), lo, REGTMP, p.To.Reg)
                break
 
        loadusepool:
index c52717dc19838a4ac3ed8b60c6c8e89677da9ccf..d13946f7ebfbbfb2487184436ef8638e22f86672 100644 (file)
@@ -14,6 +14,164 @@ import (
        "testing"
 )
 
+func TestSplitImm24uScaled(t *testing.T) {
+       tests := []struct {
+               v       int32
+               shift   int
+               wantErr bool
+               wantHi  int32
+               wantLo  int32
+       }{
+               {
+                       v:      0,
+                       shift:  0,
+                       wantHi: 0,
+                       wantLo: 0,
+               },
+               {
+                       v:      0x1001,
+                       shift:  0,
+                       wantHi: 0x1000,
+                       wantLo: 0x1,
+               },
+               {
+                       v:      0xffffff,
+                       shift:  0,
+                       wantHi: 0xfff000,
+                       wantLo: 0xfff,
+               },
+               {
+                       v:       0xffffff,
+                       shift:   1,
+                       wantErr: true,
+               },
+               {
+                       v:      0xfe,
+                       shift:  1,
+                       wantHi: 0x0,
+                       wantLo: 0x7f,
+               },
+               {
+                       v:      0x10fe,
+                       shift:  1,
+                       wantHi: 0x0,
+                       wantLo: 0x87f,
+               },
+               {
+                       v:      0x2002,
+                       shift:  1,
+                       wantHi: 0x2000,
+                       wantLo: 0x1,
+               },
+               {
+                       v:      0xfffffe,
+                       shift:  1,
+                       wantHi: 0xffe000,
+                       wantLo: 0xfff,
+               },
+               {
+                       // TODO(jsing): Fix splitting to make this fit.
+                       v:       0x1000ffe,
+                       shift:   1,
+                       wantErr: true,
+                       wantHi:  0xfff000,
+                       wantLo:  0xfff,
+               },
+               {
+                       v:       0x1001000,
+                       shift:   1,
+                       wantErr: true,
+               },
+               {
+                       v:       0xfffffe,
+                       shift:   2,
+                       wantErr: true,
+               },
+               {
+                       v:      0x4004,
+                       shift:  2,
+                       wantHi: 0x4000,
+                       wantLo: 0x1,
+               },
+               {
+                       v:      0xfffffc,
+                       shift:  2,
+                       wantHi: 0xffc000,
+                       wantLo: 0xfff,
+               },
+               {
+                       // TODO(jsing): Fix splitting to make this fit.
+                       v:       0x1002ffc,
+                       shift:   2,
+                       wantErr: true,
+                       wantHi:  0xfff000,
+                       wantLo:  0xfff,
+               },
+               {
+                       v:       0x1003000,
+                       shift:   2,
+                       wantErr: true,
+               },
+               {
+                       v:       0xfffffe,
+                       shift:   3,
+                       wantErr: true,
+               },
+               {
+                       v:      0x8008,
+                       shift:  3,
+                       wantHi: 0x8000,
+                       wantLo: 0x1,
+               },
+               {
+                       v:      0xfffff8,
+                       shift:  3,
+                       wantHi: 0xff8000,
+                       wantLo: 0xfff,
+               },
+               {
+                       // TODO(jsing): Fix splitting to make this fit.
+                       v:       0x1006ff8,
+                       shift:   3,
+                       wantErr: true,
+                       wantHi:  0xfff000,
+                       wantLo:  0xfff,
+               },
+               {
+                       v:       0x1007000,
+                       shift:   3,
+                       wantErr: true,
+               },
+       }
+       for _, test := range tests {
+               hi, lo, err := splitImm24uScaled(test.v, test.shift)
+               switch {
+               case err == nil && test.wantErr:
+                       t.Errorf("splitImm24uScaled(%v, %v) succeeded, want error", test.v, test.shift)
+               case err != nil && !test.wantErr:
+                       t.Errorf("splitImm24uScaled(%v, %v) failed: %v", test.v, test.shift, err)
+               case !test.wantErr:
+                       if got, want := hi, test.wantHi; got != want {
+                               t.Errorf("splitImm24uScaled(%x, %x) - got hi %x, want %x", test.v, test.shift, got, want)
+                       }
+                       if got, want := lo, test.wantLo; got != want {
+                               t.Errorf("splitImm24uScaled(%x, %x) - got lo %x, want %x", test.v, test.shift, got, want)
+                       }
+               }
+       }
+       for shift := 0; shift <= 3; shift++ {
+               for v := int32(0); v < 0xfff000|0xfff<<shift; v = v + 1<<shift {
+                       hi, lo, err := splitImm24uScaled(v, shift)
+                       if err != nil {
+                               t.Fatalf("splitImm24uScaled(%x, %x) failed: %v", v, shift, err)
+                       }
+                       if hi+lo<<shift != v {
+                               t.Fatalf("splitImm24uScaled(%x, %x) = (%x, %x) is incorrect", v, shift, hi, lo)
+                       }
+               }
+       }
+}
+
 // TestLarge generates a very large file to verify that large
 // program builds successfully, in particular, too-far
 // conditional branches are fixed, and also verify that the