]> Cypherpunks repositories - gostls13.git/commitdiff
build: support frame-pointer for arm64
authorZheng Xu <zheng.xu@arm.com>
Wed, 29 Aug 2018 06:55:03 +0000 (14:55 +0800)
committerCherry Zhang <cherryyz@google.com>
Wed, 29 Aug 2018 18:28:34 +0000 (18:28 +0000)
Supporting frame-pointer makes Linux's perf and other profilers much more useful
because it lets them gather a stack trace efficiently on profiling events. Major
changes include:
1. save FP on the word below where RSP is pointing to (proposed by Cherry and Austin)
2. adjust some specific offsets in runtime assembly and wrapper code
3. add support to FP in goroutine scheduler
4. adjust link stack overflow check to take the extra word into account
5. adjust nosplit test cases to enable frame sizes which are 16 bytes aligned

Performance impacts on go1 benchmarks:

Enable frame-pointer (by default)

name                      old time/op    new time/op    delta
BinaryTree17-46              5.94s ± 0%     6.00s ± 0%  +1.03%  (p=0.029 n=4+4)
Fannkuch11-46                2.84s ± 1%     2.77s ± 0%  -2.58%  (p=0.008 n=5+5)
FmtFprintfEmpty-46          55.0ns ± 1%    58.9ns ± 1%  +7.06%  (p=0.008 n=5+5)
FmtFprintfString-46          102ns ± 0%     105ns ± 0%  +2.94%  (p=0.008 n=5+5)
FmtFprintfInt-46             118ns ± 0%     117ns ± 1%  -1.19%  (p=0.000 n=4+5)
FmtFprintfIntInt-46          181ns ± 0%     182ns ± 1%    ~     (p=0.444 n=5+5)
FmtFprintfPrefixedInt-46     215ns ± 1%     214ns ± 0%    ~     (p=0.254 n=5+4)
FmtFprintfFloat-46           292ns ± 0%     296ns ± 0%  +1.46%  (p=0.029 n=4+4)
FmtManyArgs-46               720ns ± 0%     732ns ± 0%  +1.72%  (p=0.008 n=5+5)
GobDecode-46                9.82ms ± 1%   10.03ms ± 2%  +2.10%  (p=0.008 n=5+5)
GobEncode-46                8.14ms ± 0%    8.72ms ± 1%  +7.14%  (p=0.008 n=5+5)
Gzip-46                      420ms ± 0%     424ms ± 0%  +0.92%  (p=0.008 n=5+5)
Gunzip-46                   48.2ms ± 0%    48.4ms ± 0%  +0.41%  (p=0.008 n=5+5)
HTTPClientServer-46          201µs ± 4%     201µs ± 0%    ~     (p=0.730 n=5+4)
JSONEncode-46               17.1ms ± 0%    17.7ms ± 1%  +3.80%  (p=0.008 n=5+5)
JSONDecode-46               88.0ms ± 0%    90.1ms ± 0%  +2.42%  (p=0.008 n=5+5)
Mandelbrot200-46            5.06ms ± 0%    5.07ms ± 0%    ~     (p=0.310 n=5+5)
GoParse-46                  5.04ms ± 0%    5.12ms ± 0%  +1.53%  (p=0.008 n=5+5)
RegexpMatchEasy0_32-46       117ns ± 0%     117ns ± 0%    ~     (all equal)
RegexpMatchEasy0_1K-46       332ns ± 0%     329ns ± 0%  -0.78%  (p=0.008 n=5+5)
RegexpMatchEasy1_32-46       104ns ± 0%     113ns ± 0%  +8.65%  (p=0.029 n=4+4)
RegexpMatchEasy1_1K-46       563ns ± 0%     569ns ± 0%  +1.10%  (p=0.008 n=5+5)
RegexpMatchMedium_32-46      167ns ± 2%     177ns ± 1%  +5.74%  (p=0.008 n=5+5)
RegexpMatchMedium_1K-46     49.5µs ± 0%    53.4µs ± 0%  +7.81%  (p=0.008 n=5+5)
RegexpMatchHard_32-46       2.56µs ± 1%    2.72µs ± 0%  +6.01%  (p=0.008 n=5+5)
RegexpMatchHard_1K-46       77.0µs ± 0%    81.8µs ± 0%  +6.24%  (p=0.016 n=5+4)
Revcomp-46                   631ms ± 1%     627ms ± 1%    ~     (p=0.095 n=5+5)
Template-46                 81.8ms ± 0%    86.3ms ± 0%  +5.55%  (p=0.008 n=5+5)
TimeParse-46                 423ns ± 0%     432ns ± 0%  +2.32%  (p=0.008 n=5+5)
TimeFormat-46                478ns ± 2%     497ns ± 1%  +3.89%  (p=0.008 n=5+5)
[Geo mean]                  71.6µs         73.3µs       +2.45%

name                      old speed      new speed      delta
GobDecode-46              78.1MB/s ± 1%  76.6MB/s ± 2%  -2.04%  (p=0.008 n=5+5)
GobEncode-46              94.3MB/s ± 0%  88.0MB/s ± 1%  -6.67%  (p=0.008 n=5+5)
Gzip-46                   46.2MB/s ± 0%  45.8MB/s ± 0%  -0.91%  (p=0.008 n=5+5)
Gunzip-46                  403MB/s ± 0%   401MB/s ± 0%  -0.41%  (p=0.008 n=5+5)
JSONEncode-46              114MB/s ± 0%   109MB/s ± 1%  -3.66%  (p=0.008 n=5+5)
JSONDecode-46             22.0MB/s ± 0%  21.5MB/s ± 0%  -2.35%  (p=0.008 n=5+5)
GoParse-46                11.5MB/s ± 0%  11.3MB/s ± 0%  -1.51%  (p=0.008 n=5+5)
RegexpMatchEasy0_32-46     272MB/s ± 0%   272MB/s ± 1%    ~     (p=0.190 n=4+5)
RegexpMatchEasy0_1K-46    3.08GB/s ± 0%  3.11GB/s ± 0%  +0.77%  (p=0.008 n=5+5)
RegexpMatchEasy1_32-46     306MB/s ± 0%   283MB/s ± 0%  -7.63%  (p=0.029 n=4+4)
RegexpMatchEasy1_1K-46    1.82GB/s ± 0%  1.80GB/s ± 0%  -1.07%  (p=0.008 n=5+5)
RegexpMatchMedium_32-46   5.99MB/s ± 0%  5.64MB/s ± 1%  -5.77%  (p=0.016 n=4+5)
RegexpMatchMedium_1K-46   20.7MB/s ± 0%  19.2MB/s ± 0%  -7.25%  (p=0.008 n=5+5)
RegexpMatchHard_32-46     12.5MB/s ± 1%  11.8MB/s ± 0%  -5.66%  (p=0.008 n=5+5)
RegexpMatchHard_1K-46     13.3MB/s ± 0%  12.5MB/s ± 1%  -6.01%  (p=0.008 n=5+5)
Revcomp-46                 402MB/s ± 1%   405MB/s ± 1%    ~     (p=0.095 n=5+5)
Template-46               23.7MB/s ± 0%  22.5MB/s ± 0%  -5.25%  (p=0.008 n=5+5)
[Geo mean]                82.2MB/s       79.6MB/s       -3.26%

Disable frame-pointer (GOEXPERIMENT=noframepointer)

name                      old time/op    new time/op    delta
BinaryTree17-46              5.94s ± 0%     5.96s ± 0%  +0.39%  (p=0.029 n=4+4)
Fannkuch11-46                2.84s ± 1%     2.79s ± 1%  -1.68%  (p=0.008 n=5+5)
FmtFprintfEmpty-46          55.0ns ± 1%    55.2ns ± 3%    ~     (p=0.794 n=5+5)
FmtFprintfString-46          102ns ± 0%     103ns ± 0%  +0.98%  (p=0.016 n=5+4)
FmtFprintfInt-46             118ns ± 0%     115ns ± 0%  -2.54%  (p=0.029 n=4+4)
FmtFprintfIntInt-46          181ns ± 0%     179ns ± 0%  -1.10%  (p=0.000 n=5+4)
FmtFprintfPrefixedInt-46     215ns ± 1%     213ns ± 0%    ~     (p=0.143 n=5+4)
FmtFprintfFloat-46           292ns ± 0%     300ns ± 0%  +2.83%  (p=0.029 n=4+4)
FmtManyArgs-46               720ns ± 0%     739ns ± 0%  +2.64%  (p=0.008 n=5+5)
GobDecode-46                9.82ms ± 1%    9.78ms ± 1%    ~     (p=0.151 n=5+5)
GobEncode-46                8.14ms ± 0%    8.12ms ± 1%    ~     (p=0.690 n=5+5)
Gzip-46                      420ms ± 0%     420ms ± 0%    ~     (p=0.548 n=5+5)
Gunzip-46                   48.2ms ± 0%    48.0ms ± 0%  -0.33%  (p=0.032 n=5+5)
HTTPClientServer-46          201µs ± 4%     199µs ± 3%    ~     (p=0.548 n=5+5)
JSONEncode-46               17.1ms ± 0%    17.2ms ± 0%    ~     (p=0.056 n=5+5)
JSONDecode-46               88.0ms ± 0%    88.6ms ± 0%  +0.64%  (p=0.008 n=5+5)
Mandelbrot200-46            5.06ms ± 0%    5.07ms ± 0%    ~     (p=0.548 n=5+5)
GoParse-46                  5.04ms ± 0%    5.07ms ± 0%  +0.65%  (p=0.008 n=5+5)
RegexpMatchEasy0_32-46       117ns ± 0%     112ns ± 4%  -4.27%  (p=0.016 n=4+5)
RegexpMatchEasy0_1K-46       332ns ± 0%     330ns ± 1%    ~     (p=0.095 n=5+5)
RegexpMatchEasy1_32-46       104ns ± 0%     110ns ± 1%  +5.29%  (p=0.029 n=4+4)
RegexpMatchEasy1_1K-46       563ns ± 0%     567ns ± 2%    ~     (p=0.151 n=5+5)
RegexpMatchMedium_32-46      167ns ± 2%     166ns ± 0%    ~     (p=0.333 n=5+4)
RegexpMatchMedium_1K-46     49.5µs ± 0%    49.6µs ± 0%    ~     (p=0.841 n=5+5)
RegexpMatchHard_32-46       2.56µs ± 1%    2.49µs ± 0%  -2.81%  (p=0.008 n=5+5)
RegexpMatchHard_1K-46       77.0µs ± 0%    75.8µs ± 0%  -1.55%  (p=0.008 n=5+5)
Revcomp-46                   631ms ± 1%     628ms ± 0%    ~     (p=0.095 n=5+5)
Template-46                 81.8ms ± 0%    84.3ms ± 1%  +3.05%  (p=0.008 n=5+5)
TimeParse-46                 423ns ± 0%     425ns ± 0%  +0.52%  (p=0.008 n=5+5)
TimeFormat-46                478ns ± 2%     478ns ± 1%    ~     (p=1.000 n=5+5)
[Geo mean]                  71.6µs         71.6µs       -0.01%

name                      old speed      new speed      delta
GobDecode-46              78.1MB/s ± 1%  78.5MB/s ± 1%    ~     (p=0.151 n=5+5)
GobEncode-46              94.3MB/s ± 0%  94.5MB/s ± 1%    ~     (p=0.690 n=5+5)
Gzip-46                   46.2MB/s ± 0%  46.2MB/s ± 0%    ~     (p=0.571 n=5+5)
Gunzip-46                  403MB/s ± 0%   404MB/s ± 0%  +0.33%  (p=0.032 n=5+5)
JSONEncode-46              114MB/s ± 0%   113MB/s ± 0%    ~     (p=0.056 n=5+5)
JSONDecode-46             22.0MB/s ± 0%  21.9MB/s ± 0%  -0.64%  (p=0.008 n=5+5)
GoParse-46                11.5MB/s ± 0%  11.4MB/s ± 0%  -0.64%  (p=0.008 n=5+5)
RegexpMatchEasy0_32-46     272MB/s ± 0%   285MB/s ± 4%  +4.74%  (p=0.016 n=4+5)
RegexpMatchEasy0_1K-46    3.08GB/s ± 0%  3.10GB/s ± 1%    ~     (p=0.151 n=5+5)
RegexpMatchEasy1_32-46     306MB/s ± 0%   290MB/s ± 1%  -5.21%  (p=0.029 n=4+4)
RegexpMatchEasy1_1K-46    1.82GB/s ± 0%  1.81GB/s ± 2%    ~     (p=0.151 n=5+5)
RegexpMatchMedium_32-46   5.99MB/s ± 0%  6.02MB/s ± 1%    ~     (p=0.063 n=4+5)
RegexpMatchMedium_1K-46   20.7MB/s ± 0%  20.7MB/s ± 0%    ~     (p=0.659 n=5+5)
RegexpMatchHard_32-46     12.5MB/s ± 1%  12.8MB/s ± 0%  +2.88%  (p=0.008 n=5+5)
RegexpMatchHard_1K-46     13.3MB/s ± 0%  13.5MB/s ± 0%  +1.58%  (p=0.008 n=5+5)
Revcomp-46                 402MB/s ± 1%   405MB/s ± 0%    ~     (p=0.095 n=5+5)
Template-46               23.7MB/s ± 0%  23.0MB/s ± 1%  -2.95%  (p=0.008 n=5+5)
[Geo mean]                82.2MB/s       82.3MB/s       +0.04%

Frame-pointer is enabled on Linux by default but can be disabled by setting: GOEXPERIMENT=noframepointer.

Fixes #10110

Change-Id: I1bfaca6dba29a63009d7c6ab04ed7a1413d9479e
Reviewed-on: https://go-review.googlesource.com/61511
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>

14 files changed:
src/cmd/compile/internal/arm64/ggen.go
src/cmd/compile/internal/gc/pgen.go
src/cmd/internal/obj/arm64/asm7.go
src/cmd/internal/obj/arm64/obj7.go
src/cmd/internal/objabi/util.go
src/cmd/link/internal/ld/lib.go
src/runtime/asm_arm64.s
src/runtime/cgocall.go
src/runtime/rt0_darwin_arm64.s
src/runtime/rt0_linux_arm64.s
src/runtime/sys_linux_arm64.s
src/runtime/traceback.go
test/codegen/stack.go
test/nosplit.go

index f7b3851398f51c1578cfc62f5664dfdc814b6ced..204391fef1c778bde09fd6598404afd08515e4c9 100644 (file)
@@ -14,10 +14,10 @@ import (
 var darwin = objabi.GOOS == "darwin"
 
 func padframe(frame int64) int64 {
-       // arm64 requires that the frame size (not counting saved LR)
-       // be empty or be 8 mod 16. If not, pad it.
-       if frame != 0 && frame%16 != 8 {
-               frame += 8
+       // arm64 requires that the frame size (not counting saved FP&LR)
+       // be 16 bytes aligned. If not, pad it.
+       if frame%16 != 0 {
+               frame += 16 - (frame % 16)
        }
        return frame
 }
index 7f20643ab57feb717ed7fd539ebfd216d7d7c51e..563eb9e966388b496c9a0a64c64ca99c1a3d9ba5 100644 (file)
@@ -427,7 +427,8 @@ func createSimpleVars(automDecls []*Node) ([]*Node, []*dwarf.Var, map[*Node]bool
                        if Ctxt.FixedFrameSize() == 0 {
                                offs -= int64(Widthptr)
                        }
-                       if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
+                       if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) || objabi.GOARCH == "arm64" {
+                               // There is a word space for FP on ARM64 even if the frame pointer is disabled
                                offs -= int64(Widthptr)
                        }
 
@@ -607,7 +608,8 @@ func stackOffset(slot ssa.LocalSlot) int32 {
                if Ctxt.FixedFrameSize() == 0 {
                        base -= int64(Widthptr)
                }
-               if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
+               if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) || objabi.GOARCH == "arm64" {
+                       // There is a word space for FP on ARM64 even if the frame pointer is disabled
                        base -= int64(Widthptr)
                }
        case PPARAM, PPARAMOUT:
index ad4f17254467126c3b438b6b17e9ba264d6547e0..2abb8c2c773d930f51cfe04f92defcef8a4d5897 100644 (file)
@@ -49,6 +49,7 @@ type ctxt7 struct {
        blitrl     *obj.Prog
        elitrl     *obj.Prog
        autosize   int32
+       extrasize  int32
        instoffset int64
        pc         int64
        pool       struct {
@@ -777,7 +778,8 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                ctxt.Diag("arm64 ops not initialized, call arm64.buildop first")
        }
 
-       c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset&0xffffffff) + 8}
+       c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset & 0xffffffff), extrasize: int32(p.To.Offset >> 32)}
+       p.To.Offset &= 0xffffffff  // extrasize is no longer needed
 
        bflag := 1
        pc := int64(0)
@@ -1436,7 +1438,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int {
                                // a.Offset is still relative to pseudo-SP.
                                a.Reg = obj.REG_NONE
                        }
-                       c.instoffset = int64(c.autosize) + a.Offset
+                       // The frame top 8 or 16 bytes are for FP
+                       c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize)
                        return autoclass(c.instoffset)
 
                case obj.NAME_PARAM:
@@ -1536,7 +1539,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int {
                                // a.Offset is still relative to pseudo-SP.
                                a.Reg = obj.REG_NONE
                        }
-                       c.instoffset = int64(c.autosize) + a.Offset
+                       // The frame top 8 or 16 bytes are for FP
+                       c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize)
 
                case obj.NAME_PARAM:
                        if a.Reg == REGSP {
index 0d832387d7f7dd08c1f215ccc6feed8d4d0893ef..97b8f70c9b20e3e332737d2ddec57a9f74273462 100644 (file)
@@ -542,22 +542,28 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                c.autosize += 8
                        }
 
-                       if c.autosize != 0 && c.autosize&(16-1) != 0 {
-                               // The frame includes an LR.
-                               // If the frame size is 8, it's only an LR,
-                               // so there's no potential for breaking references to
-                               // local variables by growing the frame size,
-                               // because there are no local variables.
-                               // But otherwise, if there is a non-empty locals section,
-                               // the author of the code is responsible for making sure
-                               // that the frame size is 8 mod 16.
-                               if c.autosize == 8 {
-                                       c.autosize += 8
-                                       c.cursym.Func.Locals += 8
+                       if c.autosize != 0 {
+                               extrasize := int32(0)
+                               if c.autosize%16 == 8 {
+                                       // Allocate extra 8 bytes on the frame top to save FP
+                                       extrasize = 8
+                               } else if c.autosize&(16-1) == 0 {
+                                       // Allocate extra 16 bytes to save FP for the old frame whose size is 8 mod 16
+                                       extrasize = 16
                                } else {
-                                       c.ctxt.Diag("%v: unaligned frame size %d - must be 8 mod 16 (or 0)", p, c.autosize-8)
+                                       c.ctxt.Diag("%v: unaligned frame size %d - must be 16 aligned", p, c.autosize-8)
                                }
+                               c.autosize += extrasize
+                               c.cursym.Func.Locals += extrasize
+
+                               // low 32 bits for autosize
+                               // high 32 bits for extrasize
+                               p.To.Offset = int64(c.autosize) | int64(extrasize)<<32
+                       } else {
+                               // NOFRAME
+                               p.To.Offset = 0
                        }
+
                        if c.autosize == 0 && c.cursym.Func.Text.Mark&LEAF == 0 {
                                if c.ctxt.Debugvlog {
                                        c.ctxt.Logf("save suppressed in: %s\n", c.cursym.Func.Text.From.Sym.Name)
@@ -565,9 +571,6 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                c.cursym.Func.Text.Mark |= LEAF
                        }
 
-                       // FP offsets need an updated p.To.Offset.
-                       p.To.Offset = int64(c.autosize) - 8
-
                        if cursym.Func.Text.Mark&LEAF != 0 {
                                cursym.Set(obj.AttrLeaf, true)
                                if p.From.Sym.NoFrame() {
@@ -631,6 +634,26 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                q1.Spadj = aoffset
                        }
 
+                       if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
+                               q1 = obj.Appendp(q1, c.newprog)
+                               q1.Pos = p.Pos
+                               q1.As = AMOVD
+                               q1.From.Type = obj.TYPE_REG
+                               q1.From.Reg = REGFP
+                               q1.To.Type = obj.TYPE_MEM
+                               q1.To.Reg = REGSP
+                               q1.To.Offset = -8
+
+                               q1 = obj.Appendp(q1, c.newprog)
+                               q1.Pos = p.Pos
+                               q1.As = ASUB
+                               q1.From.Type = obj.TYPE_CONST
+                               q1.From.Offset = 8
+                               q1.Reg = REGSP
+                               q1.To.Type = obj.TYPE_REG
+                               q1.To.Reg = REGFP
+                       }
+
                        if c.cursym.Func.Text.From.Sym.Wrapper() {
                                // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame
                                //
@@ -753,9 +776,30 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                        p.To.Type = obj.TYPE_REG
                                        p.To.Reg = REGSP
                                        p.Spadj = -c.autosize
+
+                                       if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
+                                               p = obj.Appendp(p, c.newprog)
+                                               p.As = ASUB
+                                               p.From.Type = obj.TYPE_CONST
+                                               p.From.Offset = 8
+                                               p.Reg = REGSP
+                                               p.To.Type = obj.TYPE_REG
+                                               p.To.Reg = REGFP
+                                       }
                                }
                        } else {
                                /* want write-back pre-indexed SP+autosize -> SP, loading REGLINK*/
+
+                               if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
+                                       p.As = AMOVD
+                                       p.From.Type = obj.TYPE_MEM
+                                       p.From.Reg = REGSP
+                                       p.From.Offset = -8
+                                       p.To.Type = obj.TYPE_REG
+                                       p.To.Reg = REGFP
+                                       p = obj.Appendp(p, c.newprog)
+                               }
+
                                aoffset := c.autosize
 
                                if aoffset > 0xF0 {
@@ -814,7 +858,6 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                        p.Spadj = int32(+p.From.Offset)
                                }
                        }
-                       break
 
                case obj.AGETCALLERPC:
                        if cursym.Leaf() {
@@ -828,6 +871,112 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                p.From.Type = obj.TYPE_MEM
                                p.From.Reg = REGSP
                        }
+
+               case obj.ADUFFCOPY:
+                       if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
+                               //  ADR ret_addr, R27
+                               //  STP (FP, R27), -24(SP)
+                               //  SUB 24, SP, FP
+                               //  DUFFCOPY
+                               // ret_addr:
+                               //  SUB 8, SP, FP
+
+                               q1 := p
+                               // copy DUFFCOPY from q1 to q4
+                               q4 := obj.Appendp(p, c.newprog)
+                               q4.Pos = p.Pos
+                               q4.As = obj.ADUFFCOPY
+                               q4.To = p.To
+
+                               q1.As = AADR
+                               q1.From.Type = obj.TYPE_BRANCH
+                               q1.To.Type = obj.TYPE_REG
+                               q1.To.Reg = REG_R27
+
+                               q2 := obj.Appendp(q1, c.newprog)
+                               q2.Pos = p.Pos
+                               q2.As = ASTP
+                               q2.From.Type = obj.TYPE_REGREG
+                               q2.From.Reg = REGFP
+                               q2.From.Offset = int64(REG_R27)
+                               q2.To.Type = obj.TYPE_MEM
+                               q2.To.Reg = REGSP
+                               q2.To.Offset = -24
+
+                               // maintaine FP for DUFFCOPY
+                               q3 := obj.Appendp(q2, c.newprog)
+                               q3.Pos = p.Pos
+                               q3.As = ASUB
+                               q3.From.Type = obj.TYPE_CONST
+                               q3.From.Offset = 24
+                               q3.Reg = REGSP
+                               q3.To.Type = obj.TYPE_REG
+                               q3.To.Reg = REGFP
+
+                               q5 := obj.Appendp(q4, c.newprog)
+                               q5.Pos = p.Pos
+                               q5.As = ASUB
+                               q5.From.Type = obj.TYPE_CONST
+                               q5.From.Offset = 8
+                               q5.Reg = REGSP
+                               q5.To.Type = obj.TYPE_REG
+                               q5.To.Reg = REGFP
+                               q1.Pcond = q5
+                               p = q5
+                       }
+
+               case obj.ADUFFZERO:
+                       if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
+                               //  ADR ret_addr, R27
+                               //  STP (FP, R27), -24(SP)
+                               //  SUB 24, SP, FP
+                               //  DUFFZERO
+                               // ret_addr:
+                               //  SUB 8, SP, FP
+
+                               q1 := p
+                               // copy DUFFZERO from q1 to q4
+                               q4 := obj.Appendp(p, c.newprog)
+                               q4.Pos = p.Pos
+                               q4.As = obj.ADUFFZERO
+                               q4.To = p.To
+
+                               q1.As = AADR
+                               q1.From.Type = obj.TYPE_BRANCH
+                               q1.To.Type = obj.TYPE_REG
+                               q1.To.Reg = REG_R27
+
+                               q2 := obj.Appendp(q1, c.newprog)
+                               q2.Pos = p.Pos
+                               q2.As = ASTP
+                               q2.From.Type = obj.TYPE_REGREG
+                               q2.From.Reg = REGFP
+                               q2.From.Offset = int64(REG_R27)
+                               q2.To.Type = obj.TYPE_MEM
+                               q2.To.Reg = REGSP
+                               q2.To.Offset = -24
+
+                               // maintaine FP for DUFFZERO
+                               q3 := obj.Appendp(q2, c.newprog)
+                               q3.Pos = p.Pos
+                               q3.As = ASUB
+                               q3.From.Type = obj.TYPE_CONST
+                               q3.From.Offset = 24
+                               q3.Reg = REGSP
+                               q3.To.Type = obj.TYPE_REG
+                               q3.To.Reg = REGFP
+
+                               q5 := obj.Appendp(q4, c.newprog)
+                               q5.Pos = p.Pos
+                               q5.As = ASUB
+                               q5.From.Type = obj.TYPE_CONST
+                               q5.From.Offset = 8
+                               q5.Reg = REGSP
+                               q5.To.Type = obj.TYPE_REG
+                               q5.To.Reg = REGFP
+                               q1.Pcond = q5
+                               p = q5
+                       }
                }
        }
 }
index a47e2f93a10e9f463836a34e439f07fdf5048530..ffd1c04d39fcd0bce480a564bc7dacf48a251f60 100644 (file)
@@ -76,7 +76,7 @@ func init() {
 }
 
 func Framepointer_enabled(goos, goarch string) bool {
-       return framepointer_enabled != 0 && goarch == "amd64" && goos != "nacl"
+       return framepointer_enabled != 0 && (goarch == "amd64" && goos != "nacl" || goarch == "arm64" && goos == "linux")
 }
 
 func addexp(s string) {
index 1b6d5d17047129b4712d826e9946c54179c5ec77..ba03cb707b84f74d8115ddee45c6be7a15c53d85 100644 (file)
@@ -1815,6 +1815,10 @@ func (ctxt *Link) dostkcheck() {
        ch.up = nil
 
        ch.limit = objabi.StackLimit - callsize(ctxt)
+       if objabi.GOARCH == "arm64" {
+               // need extra 8 bytes below SP to save FP
+               ch.limit -= 8
+       }
 
        // Check every function, but do the nosplit functions in a first pass,
        // to make the printed failure chains as short as possible.
index af389be9fee336fce1b5badc00edaaa571e0c5cf..6a6a699241dc582112137eaec6d89e0c240043f0 100644 (file)
@@ -39,10 +39,9 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
 #endif
        MOVD    $setg_gcc<>(SB), R1     // arg 1: setg
        MOVD    g, R0                   // arg 0: G
+       SUB     $16, RSP                // reserve 16 bytes for sp-8 where fp may be saved.
        BL      (R12)
-       MOVD    _cgo_init(SB), R12
-       CMP     $0, R12
-       BEQ     nocgo
+       ADD     $16, RSP
 
 nocgo:
        // update stackguard after _cgo_init
@@ -107,6 +106,7 @@ TEXT runtime·gosave(SB), NOSPLIT|NOFRAME, $0-8
        MOVD    buf+0(FP), R3
        MOVD    RSP, R0
        MOVD    R0, gobuf_sp(R3)
+       MOVD    R29, gobuf_bp(R3)
        MOVD    LR, gobuf_pc(R3)
        MOVD    g, gobuf_g(R3)
        MOVD    ZR, gobuf_lr(R3)
@@ -128,10 +128,12 @@ TEXT runtime·gogo(SB), NOSPLIT, $24-8
        MOVD    0(g), R4        // make sure g is not nil
        MOVD    gobuf_sp(R5), R0
        MOVD    R0, RSP
+       MOVD    gobuf_bp(R5), R29
        MOVD    gobuf_lr(R5), LR
        MOVD    gobuf_ret(R5), R0
        MOVD    gobuf_ctxt(R5), R26
        MOVD    $0, gobuf_sp(R5)
+       MOVD    $0, gobuf_bp(R5)
        MOVD    $0, gobuf_ret(R5)
        MOVD    $0, gobuf_lr(R5)
        MOVD    $0, gobuf_ctxt(R5)
@@ -147,6 +149,7 @@ TEXT runtime·mcall(SB), NOSPLIT|NOFRAME, $0-8
        // Save caller state in g->sched
        MOVD    RSP, R0
        MOVD    R0, (g_sched+gobuf_sp)(g)
+       MOVD    R29, (g_sched+gobuf_bp)(g)
        MOVD    LR, (g_sched+gobuf_pc)(g)
        MOVD    $0, (g_sched+gobuf_lr)(g)
        MOVD    g, (g_sched+gobuf_g)(g)
@@ -163,6 +166,7 @@ TEXT runtime·mcall(SB), NOSPLIT|NOFRAME, $0-8
        MOVD    0(R26), R4                      // code pointer
        MOVD    (g_sched+gobuf_sp)(g), R0
        MOVD    R0, RSP // sp = m->g0->sched.sp
+       MOVD    (g_sched+gobuf_bp)(g), R29
        MOVD    R3, -8(RSP)
        MOVD    $0, -16(RSP)
        SUB     $16, RSP
@@ -211,6 +215,7 @@ switch:
        MOVD    R6, (g_sched+gobuf_pc)(g)
        MOVD    RSP, R0
        MOVD    R0, (g_sched+gobuf_sp)(g)
+       MOVD    R29, (g_sched+gobuf_bp)(g)
        MOVD    $0, (g_sched+gobuf_lr)(g)
        MOVD    g, (g_sched+gobuf_g)(g)
 
@@ -224,6 +229,7 @@ switch:
        MOVD    $runtime·mstart(SB), R4
        MOVD    R4, 0(R3)
        MOVD    R3, RSP
+       MOVD    (g_sched+gobuf_bp)(g), R29
 
        // call target function
        MOVD    0(R26), R3      // code pointer
@@ -235,7 +241,9 @@ switch:
        BL      runtime·save_g(SB)
        MOVD    (g_sched+gobuf_sp)(g), R0
        MOVD    R0, RSP
+       MOVD    (g_sched+gobuf_bp)(g), R29
        MOVD    $0, (g_sched+gobuf_sp)(g)
+       MOVD    $0, (g_sched+gobuf_bp)(g)
        RET
 
 noswitch:
@@ -244,6 +252,7 @@ noswitch:
        // at an intermediate systemstack.
        MOVD    0(R26), R3      // code pointer
        MOVD.P  16(RSP), R30    // restore LR
+       SUB     $8, RSP, R29    // restore FP
        B       (R3)
 
 /*
@@ -278,6 +287,7 @@ TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
        // Set g->sched to context in f
        MOVD    RSP, R0
        MOVD    R0, (g_sched+gobuf_sp)(g)
+       MOVD    R29, (g_sched+gobuf_bp)(g)
        MOVD    LR, (g_sched+gobuf_pc)(g)
        MOVD    R3, (g_sched+gobuf_lr)(g)
        MOVD    R26, (g_sched+gobuf_ctxt)(g)
@@ -294,6 +304,7 @@ TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
        BL      runtime·save_g(SB)
        MOVD    (g_sched+gobuf_sp)(g), R0
        MOVD    R0, RSP
+       MOVD    (g_sched+gobuf_bp)(g), R29
        MOVD.W  $0, -16(RSP)    // create a call frame on g0 (saved LR; keep 16-aligned)
        BL      runtime·newstack(SB)
 
@@ -843,8 +854,9 @@ TEXT runtime·jmpdefer(SB), NOSPLIT|NOFRAME, $0-16
 // Save state of caller into g->sched. Smashes R0.
 TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
        MOVD    LR, (g_sched+gobuf_pc)(g)
-       MOVD RSP, R0
+       MOVD    RSP, R0
        MOVD    R0, (g_sched+gobuf_sp)(g)
+       MOVD    R29, (g_sched+gobuf_bp)(g)
        MOVD    $0, (g_sched+gobuf_lr)(g)
        MOVD    $0, (g_sched+gobuf_ret)(g)
        // Assert ctxt is zero. See func save.
@@ -885,6 +897,7 @@ TEXT ·asmcgocall(SB),NOSPLIT,$0-20
        BL      runtime·save_g(SB)
        MOVD    (g_sched+gobuf_sp)(g), R0
        MOVD    R0, RSP
+       MOVD    (g_sched+gobuf_bp)(g), R29
        MOVD    R9, R0
 
        // Now on a scheduling stack (a pthread-created stack).
@@ -996,6 +1009,7 @@ needm:
        MOVD    m_g0(R8), R3
        MOVD    RSP, R0
        MOVD    R0, (g_sched+gobuf_sp)(R3)
+       MOVD    R29, (g_sched+gobuf_bp)(R3)
 
 havem:
        // Now there's a valid m, and we're running on its m->g0.
@@ -1003,7 +1017,7 @@ havem:
        // Save current sp in m->g0->sched.sp in preparation for
        // switch back to m->curg stack.
        // NOTE: unwindm knows that the saved g->sched.sp is at 16(RSP) aka savedsp-16(SP).
-       // Beware that the frame size is actually 32.
+       // Beware that the frame size is actually 32+16.
        MOVD    m_g0(R8), R3
        MOVD    (g_sched+gobuf_sp)(R3), R4
        MOVD    R4, savedsp-16(SP)
@@ -1030,10 +1044,12 @@ havem:
        BL      runtime·save_g(SB)
        MOVD    (g_sched+gobuf_sp)(g), R4 // prepare stack as R4
        MOVD    (g_sched+gobuf_pc)(g), R5
-       MOVD    R5, -(24+8)(R4)
+       MOVD    R5, -48(R4)
+       MOVD    (g_sched+gobuf_bp)(g), R5
+       MOVD    R5, -56(R4)
        MOVD    ctxt+24(FP), R0
-       MOVD    R0, -(16+8)(R4)
-       MOVD    $-(24+8)(R4), R0 // maintain 16-byte SP alignment
+       MOVD    R0, -40(R4)
+       MOVD    $-48(R4), R0 // maintain 16-byte SP alignment
        MOVD    R0, RSP
        BL      runtime·cgocallbackg(SB)
 
@@ -1041,7 +1057,7 @@ havem:
        MOVD    0(RSP), R5
        MOVD    R5, (g_sched+gobuf_pc)(g)
        MOVD    RSP, R4
-       ADD     $(24+8), R4, R4
+       ADD     $48, R4, R4
        MOVD    R4, (g_sched+gobuf_sp)(g)
 
        // Switch back to m->g0's stack and restore m->g0->sched.sp.
index c85033f4bccff682a7cbec7fe4f8e9be63118f2f..86bd2fb01c09666f154f5cff082ef0b9410fec1a 100644 (file)
@@ -268,7 +268,8 @@ func cgocallbackg1(ctxt uintptr) {
        case "arm64":
                // On arm64, stack frame is four words and there's a saved LR between
                // SP and the stack frame and between the stack frame and the arguments.
-               cb = (*args)(unsafe.Pointer(sp + 5*sys.PtrSize))
+               // Additional two words (16-byte alignment) are for saving FP.
+               cb = (*args)(unsafe.Pointer(sp + 7*sys.PtrSize))
        case "amd64":
                // On amd64, stack frame is two words, plus caller PC.
                if framepointer_enabled {
index d039a8e0abab274413b1fe521b08f6d33f287cfc..e3972f492429af201dd1af77b04a6b2c97ddef07 100644 (file)
@@ -49,7 +49,9 @@ TEXT _rt0_arm64_darwin_lib(SB),NOSPLIT,$168
        MOVD  _cgo_sys_thread_create(SB), R4
        MOVD  $_rt0_arm64_darwin_lib_go(SB), R0
        MOVD  $0, R1
+       SUB   $16, RSP          // reserve 16 bytes for sp-8 where fp may be saved.
        BL    (R4)
+       ADD   $16, RSP
 
        // Restore callee-save registers.
        MOVD 24(RSP), R19
index 458f082159dae9c99a698a352a9510e14d365a35..a6bc99df569309e056a5fc7d4308da63b52cdc92 100644 (file)
@@ -48,7 +48,9 @@ TEXT _rt0_arm64_linux_lib(SB),NOSPLIT,$184
        BEQ     nocgo
        MOVD    $_rt0_arm64_linux_lib_go(SB), R0
        MOVD    $0, R1
+       SUB     $16, RSP                // reserve 16 bytes for sp-8 where fp may be saved.
        BL      (R4)
+       ADD     $16, RSP
        B       restore
 
 nocgo:
index c6afd76a657e27e45f404b6c97aa4871c180917c..1c8fce3db649b027d5ae60dd606534eeab6ef454 100644 (file)
@@ -239,7 +239,7 @@ TEXT runtime·nanotime(SB),NOSPLIT,$24-8
        MOVD    (g_sched+gobuf_sp)(R3), R1      // Set RSP to g0 stack
 
 noswitch:
-       SUB     $16, R1
+       SUB     $32, R1
        BIC     $15, R1
        MOVD    R1, RSP
 
@@ -298,7 +298,9 @@ TEXT runtime·callCgoSigaction(SB),NOSPLIT,$0
        MOVD    new+8(FP), R1
        MOVD    old+16(FP), R2
        MOVD     _cgo_sigaction(SB), R3
+       SUB     $16, RSP                // reserve 16 bytes for sp-8 where fp may be saved.
        BL      R3
+       ADD     $16, RSP
        MOVW    R0, ret+24(FP)
        RET
 
@@ -361,7 +363,9 @@ TEXT runtime·callCgoMmap(SB),NOSPLIT,$0
        MOVW    fd+24(FP), R4
        MOVW    off+28(FP), R5
        MOVD    _cgo_mmap(SB), R9
+       SUB     $16, RSP                // reserve 16 bytes for sp-8 where fp may be saved.
        BL      R9
+       ADD     $16, RSP
        MOVD    R0, ret+32(FP)
        RET
 
@@ -382,7 +386,9 @@ TEXT runtime·callCgoMunmap(SB),NOSPLIT,$0
        MOVD    addr+0(FP), R0
        MOVD    n+8(FP), R1
        MOVD    _cgo_munmap(SB), R9
+       SUB     $16, RSP                // reserve 16 bytes for sp-8 where fp may be saved.
        BL      R9
+       ADD     $16, RSP
        RET
 
 TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
index 8370fd75937edde53c1126e5f1ee636df09d976b..4c2010493a76a8da0e51cbf82bfcdfd0cac8568e 100644 (file)
@@ -285,7 +285,7 @@ func gentraceback(pc0, sp0, lr0 uintptr, gp *g, skip int, pcbuf *uintptr, max in
 
                // If framepointer_enabled and there's a frame, then
                // there's a saved bp here.
-               if framepointer_enabled && GOARCH == "amd64" && frame.varp > frame.sp {
+               if frame.varp > frame.sp && (framepointer_enabled && GOARCH == "amd64" || GOARCH == "arm64") {
                        frame.varp -= sys.RegSize
                }
 
index 7e12dbc0eb6bbe0af61e3c3d34df2a8137c44050..0f2f6178c7f4cbcc046d5336c0fa380e4104338a 100644 (file)
@@ -16,7 +16,7 @@ import "runtime"
 // 386:"TEXT\t.*, [$]0-"
 // amd64:"TEXT\t.*, [$]0-"
 // arm:"TEXT\t.*, [$]-4-"
-// arm64:"TEXT\t.*, [$]-8-"
+// arm64:"TEXT\t.*, [$]0-"
 // mips:"TEXT\t.*, [$]-4-"
 // ppc64le:"TEXT\t.*, [$]0-"
 // s390x:"TEXT\t.*, [$]0-"
@@ -35,7 +35,7 @@ type T struct {
 // 386:"TEXT\t.*, [$]0-"
 // amd64:"TEXT\t.*, [$]0-"
 // arm:"TEXT\t.*, [$]0-" (spills return address)
-// arm64:"TEXT\t.*, [$]-8-"
+// arm64:"TEXT\t.*, [$]0-"
 // mips:"TEXT\t.*, [$]-4-"
 // ppc64le:"TEXT\t.*, [$]0-"
 // s390x:"TEXT\t.*, [$]0-"
@@ -50,7 +50,7 @@ func ZeroLargeStruct(x *T) {
 // - 386 fails due to spilling a register
 // amd64:"TEXT\t.*, [$]0-"
 // arm:"TEXT\t.*, [$]0-" (spills return address)
-// arm64:"TEXT\t.*, [$]-8-"
+// arm64:"TEXT\t.*, [$]0-"
 // ppc64le:"TEXT\t.*, [$]0-"
 // s390x:"TEXT\t.*, [$]0-"
 // Note: that 386 currently has to spill a register.
@@ -64,7 +64,7 @@ func KeepWanted(t *T) {
 // - 386 fails due to spilling a register
 // - arm & mips fail due to softfloat calls
 // amd64:"TEXT\t.*, [$]0-"
-// arm64:"TEXT\t.*, [$]-8-"
+// arm64:"TEXT\t.*, [$]0-"
 // ppc64le:"TEXT\t.*, [$]0-"
 // s390x:"TEXT\t.*, [$]0-"
 func ArrayAdd64(a, b [4]float64) [4]float64 {
@@ -76,7 +76,7 @@ func ArrayAdd64(a, b [4]float64) [4]float64 {
 // 386:"TEXT\t.*, [$]0-"
 // amd64:"TEXT\t.*, [$]0-"
 // arm:"TEXT\t.*, [$]0-" (spills return address)
-// arm64:"TEXT\t.*, [$]-8-"
+// arm64:"TEXT\t.*, [$]0-"
 // mips:"TEXT\t.*, [$]-4-"
 // ppc64le:"TEXT\t.*, [$]0-"
 // s390x:"TEXT\t.*, [$]0-"
index e6cd04e563060ddb94a04d9356a6f99e5f01b322..8b61c9e96d1429182e69efbfd7bedefe8d56a3b9 100644 (file)
@@ -118,11 +118,11 @@ main 136
 # (CallSize is 32 on ppc64, 8 on amd64 for frame pointer.)
 main 96 nosplit
 main 100 nosplit; REJECT ppc64 ppc64le
-main 104 nosplit; REJECT ppc64 ppc64le
+main 104 nosplit; REJECT ppc64 ppc64le arm64
 main 108 nosplit; REJECT ppc64 ppc64le
-main 112 nosplit; REJECT ppc64 ppc64le
+main 112 nosplit; REJECT ppc64 ppc64le arm64
 main 116 nosplit; REJECT ppc64 ppc64le
-main 120 nosplit; REJECT ppc64 ppc64le amd64
+main 120 nosplit; REJECT ppc64 ppc64le amd64 arm64
 main 124 nosplit; REJECT ppc64 ppc64le amd64
 main 128 nosplit; REJECT
 main 132 nosplit; REJECT
@@ -136,11 +136,11 @@ main 136 nosplit; REJECT
 # Because AMD64 uses frame pointer, it has 8 fewer bytes.
 main 96 nosplit call f; f 0 nosplit
 main 100 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le
-main 104 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le
+main 104 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le arm64
 main 108 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le
-main 112 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le amd64
+main 112 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le amd64 arm64
 main 116 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le amd64
-main 120 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le amd64
+main 124 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le amd64 arm64
 main 124 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le amd64 386
 main 128 nosplit call f; f 0 nosplit; REJECT
 main 132 nosplit call f; f 0 nosplit; REJECT
@@ -152,11 +152,11 @@ main 136 nosplit call f; f 0 nosplit; REJECT
 # Architectures differ in the same way as before.
 main 96 nosplit call f; f 0 call f
 main 100 nosplit call f; f 0 call f; REJECT ppc64 ppc64le
-main 104 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64
+main 104 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 arm64
 main 108 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64
-main 112 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64
+main 112 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 arm64
 main 116 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64
-main 120 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 386
+main 120 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 386 arm64
 main 124 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 386
 main 128 nosplit call f; f 0 call f; REJECT
 main 132 nosplit call f; f 0 call f; REJECT
@@ -165,11 +165,11 @@ main 136 nosplit call f; f 0 call f; REJECT
 # Indirect calls are assumed to be splitting functions.
 main 96 nosplit callind
 main 100 nosplit callind; REJECT ppc64 ppc64le
-main 104 nosplit callind; REJECT ppc64 ppc64le amd64
+main 104 nosplit callind; REJECT ppc64 ppc64le amd64 arm64
 main 108 nosplit callind; REJECT ppc64 ppc64le amd64
-main 112 nosplit callind; REJECT ppc64 ppc64le amd64
+main 112 nosplit callind; REJECT ppc64 ppc64le amd64 arm64
 main 116 nosplit callind; REJECT ppc64 ppc64le amd64
-main 120 nosplit callind; REJECT ppc64 ppc64le amd64 386
+main 120 nosplit callind; REJECT ppc64 ppc64le amd64 386 arm64
 main 124 nosplit callind; REJECT ppc64 ppc64le amd64 386
 main 128 nosplit callind; REJECT
 main 132 nosplit callind; REJECT
@@ -319,7 +319,7 @@ TestCases:
                                        }
                                }
 
-                               if size%ptrSize == 4 || goarch == "arm64" && size != 0 && (size+8)%16 != 0 {
+                               if size%ptrSize == 4 {
                                        continue TestCases
                                }
                                nosplit := m[3]