]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/asm: add PCALIGN support on 386/amd64
authorMauri de Souza Meneguzzo <mauri870@gmail.com>
Fri, 28 Jul 2023 19:10:04 +0000 (19:10 +0000)
committerGopher Robot <gobot@golang.org>
Fri, 28 Jul 2023 19:39:51 +0000 (19:39 +0000)
The PCALIGN asm directive was not supported on 386/amd64,
causing a compile-time error when used. The same directive
is currently supported on arm64, loong64 and ppc64 architectures.

This has potential for noticeable performance improvements on
amd64 across multiple packages, I did a quick test aligning a hot
loop on bytes.IndexByte:

```
IndexByte/10-16                 3.477n ± ∞ ¹   3.462n ± ∞ ¹        ~ (p=0.198 n=5)
IndexByte/32-16                 4.675n ± ∞ ¹   4.834n ± ∞ ¹   +3.40% (p=0.008 n=5)
IndexByte/4K-16                 67.47n ± ∞ ¹   44.44n ± ∞ ¹  -34.13% (p=0.008 n=5)
IndexByte/4M-16                 61.98µ ± ∞ ¹   45.07µ ± ∞ ¹  -27.28% (p=0.008 n=5)
IndexByte/64M-16               1206.6µ ± ∞ ¹   940.9µ ± ∞ ¹  -22.02% (p=0.008 n=5)
IndexBytePortable/10-16         4.064n ± ∞ ¹   4.044n ± ∞ ¹        ~ (p=0.325 n=5)
IndexBytePortable/32-16         9.999n ± ∞ ¹   9.934n ± ∞ ¹        ~ (p=0.151 n=5)
IndexBytePortable/4K-16         975.8n ± ∞ ¹   965.5n ± ∞ ¹        ~ (p=0.151 n=5)
IndexBytePortable/4M-16         973.3µ ± ∞ ¹   972.3µ ± ∞ ¹        ~ (p=0.222 n=5)
IndexBytePortable/64M-16        15.68m ± ∞ ¹   15.89m ± ∞ ¹        ~ (p=0.310 n=5)
geomean                         1.478µ         1.342µ         -9.20%

IndexByte/10-16                2.678Gi ± ∞ ¹   2.690Gi ± ∞ ¹        ~ (p=0.151 n=5)
IndexByte/32-16                6.375Gi ± ∞ ¹   6.165Gi ± ∞ ¹   -3.30% (p=0.008 n=5)
IndexByte/4K-16                56.54Gi ± ∞ ¹   85.85Gi ± ∞ ¹  +51.83% (p=0.008 n=5)
IndexByte/4M-16                63.03Gi ± ∞ ¹   86.68Gi ± ∞ ¹  +37.52% (p=0.008 n=5)
IndexByte/64M-16               51.80Gi ± ∞ ¹   66.42Gi ± ∞ ¹  +28.23% (p=0.008 n=5)
IndexBytePortable/10-16        2.291Gi ± ∞ ¹   2.303Gi ± ∞ ¹        ~ (p=0.421 n=5)
IndexBytePortable/32-16        2.980Gi ± ∞ ¹   3.000Gi ± ∞ ¹        ~ (p=0.151 n=5)
IndexBytePortable/4K-16        3.909Gi ± ∞ ¹   3.951Gi ± ∞ ¹        ~ (p=0.151 n=5)
IndexBytePortable/4M-16        4.013Gi ± ∞ ¹   4.017Gi ± ∞ ¹        ~ (p=0.222 n=5)
IndexBytePortable/64M-16       3.987Gi ± ∞ ¹   3.933Gi ± ∞ ¹        ~ (p=0.310 n=5)
geomean                        8.183Gi         9.013Gi        +10.14%
```

Fixes #56474

Change-Id: Idea022b1a16e6d4b8dd778723adb862c46602c4f
GitHub-Last-Rev: 2eb7e31dc378a02fd83faa7d41239df0f2859677
GitHub-Pull-Request: golang/go#61516
Reviewed-on: https://go-review.googlesource.com/c/go/+/511662
Run-TryBot: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Auto-Submit: Keith Randall <khr@golang.org>

src/cmd/internal/obj/x86/asm6.go
src/cmd/internal/obj/x86/asm_test.go

index 782b6d4aff1051cc16622488113a8f95ed0b6ff4..5e988eaf48a2200161a14e107e32179482516a1e 100644 (file)
@@ -2036,6 +2036,31 @@ type nopPad struct {
        n int32     // Size of the pad
 }
 
+// Padding bytes to add to align code as requested.
+// Alignment is restricted to powers of 2 between 8 and 2048 inclusive.
+//
+// pc: current offset in function, in bytes
+// a: requested alignment, in bytes
+// cursym: current function being assembled
+// returns number of bytes of padding needed
+func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int {
+       if !((a&(a-1) == 0) && 8 <= a && a <= 2048) {
+               ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", a)
+               return 0
+       }
+
+       // By default function alignment is 32 bytes for amd64
+       if cursym.Func().Align < int32(a) {
+               cursym.Func().Align = int32(a)
+       }
+
+       if pc&(a-1) != 0 {
+               return int(a - (pc & (a - 1)))
+       }
+
+       return 0
+}
+
 func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
        if ctxt.Retpoline && ctxt.Arch.Family == sys.I386 {
                ctxt.Diag("-spectre=ret not supported on 386")
@@ -2119,6 +2144,19 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
                        c0 := c
                        c = pjc.padJump(ctxt, s, p, c)
 
+                       if p.As == obj.APCALIGN {
+                               aln := p.From.Offset
+                               v := addpad(int64(c), aln, ctxt, s)
+                               if v > 0 {
+                                       s.Grow(int64(c) + int64(v))
+                                       fillnop(s.P[c:], int(v))
+                               }
+
+                               c += int32(v)
+                               pPrev = p
+                               continue
+                       }
+
                        if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 {
                                // pad with NOPs
                                v := -c & (loopAlign - 1)
index 36c8fce675b7dabe23c9e471d9b6760be6e2f67d..458a91258a35b002b946da61d9709286e735a549 100644 (file)
@@ -7,6 +7,10 @@ package x86
 import (
        "cmd/internal/obj"
        "cmd/internal/objabi"
+       "internal/testenv"
+       "os"
+       "path/filepath"
+       "regexp"
        "testing"
 )
 
@@ -289,3 +293,50 @@ func TestRegIndex(t *testing.T) {
                }
        }
 }
+
+// TestPCALIGN verifies the correctness of the PCALIGN by checking if the
+// code can be aligned to the alignment value.
+func TestPCALIGN(t *testing.T) {
+       testenv.MustHaveGoBuild(t)
+       dir := t.TempDir()
+       tmpfile := filepath.Join(dir, "test.s")
+       tmpout := filepath.Join(dir, "test.o")
+
+       var testCases = []struct {
+               name string
+               code string
+               out  string
+       }{
+               {
+                       name: "8-byte alignment",
+                       code: "TEXT ·foo(SB),$0-0\nMOVQ $0, AX\nPCALIGN $8\nMOVQ $1, BX\nRET\n",
+                       out:  `0x0008\s00008\s\(.*\)\tMOVQ\t\$1,\sBX`,
+               },
+               {
+                       name: "16-byte alignment",
+                       code: "TEXT ·foo(SB),$0-0\nMOVQ $0, AX\nPCALIGN $16\nMOVQ $2, CX\nRET\n",
+                       out:  `0x0010\s00016\s\(.*\)\tMOVQ\t\$2,\sCX`,
+               },
+       }
+
+       for _, test := range testCases {
+               if err := os.WriteFile(tmpfile, []byte(test.code), 0644); err != nil {
+                       t.Fatal(err)
+               }
+               cmd := testenv.Command(t, testenv.GoToolPath(t), "tool", "asm", "-S", "-o", tmpout, tmpfile)
+               cmd.Env = append(os.Environ(), "GOARCH=amd64", "GOOS=linux")
+               out, err := cmd.CombinedOutput()
+               if err != nil {
+                       t.Errorf("The %s build failed: %v, output: %s", test.name, err, out)
+                       continue
+               }
+
+               matched, err := regexp.MatchString(test.out, string(out))
+               if err != nil {
+                       t.Fatal(err)
+               }
+               if !matched {
+                       t.Errorf("The %s testing failed!\ninput: %s\noutput: %s\n", test.name, test.code, out)
+               }
+       }
+}