]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: redo arm64 LR/FP save and restore
authorKeith Randall <khr@golang.org>
Sat, 17 May 2025 22:05:56 +0000 (15:05 -0700)
committerKeith Randall <khr@golang.org>
Mon, 6 Oct 2025 21:11:41 +0000 (14:11 -0700)
Instead of storing LR (the return address) at 0(SP) and the FP
(parent's frame pointer) at -8(SP), store them at framesize-8(SP)
and framesize-16(SP), respectively.

We push and pop data onto the stack such that we're never accessing
anything below SP.

The prolog/epilog lengths are unchanged (3 insns for a typical prolog,
2 for a typical epilog).

We use 8 bytes more per frame.

Typical prologue:

    STP.W   (FP, LR), -16(SP)
    MOVD    SP, FP
    SUB     $C, SP

Typical epilogue:

    ADD     $C, SP
    LDP.P   16(SP), (FP, LR)
    RET

The previous word where we stored LR, at 0(SP), is now unused.
We could repurpose that slot for storing a local variable.

The new prolog and epilog instructions are recognized by libunwind,
so pc-sampling tools like perf should now be accurate. (TODO: except
maybe after the first RET instruction? Have to look into that.)

Update #73753 (fixes, for arm64)
Update #57302 (Quim thinks this will help on that issue)

Change-Id: I4800036a9a9a08aaaf35d9f99de79a36cf37ebb8
Reviewed-on: https://go-review.googlesource.com/c/go/+/674615
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
23 files changed:
src/cmd/compile/abi-internal.md
src/cmd/compile/internal/arm64/ggen.go
src/cmd/compile/internal/arm64/ssa.go
src/cmd/compile/internal/ssagen/pgen.go
src/cmd/compile/internal/ssagen/ssa.go
src/cmd/internal/obj/arm64/asm7.go
src/cmd/internal/obj/arm64/obj7.go
src/cmd/link/internal/amd64/obj.go
src/cmd/link/internal/arm64/obj.go
src/cmd/link/internal/ld/dwarf.go
src/cmd/link/internal/ld/lib.go
src/cmd/link/internal/ld/stackcheck.go
src/cmd/link/internal/x86/obj.go
src/runtime/asm_arm64.s
src/runtime/mkpreempt.go
src/runtime/panic.go
src/runtime/preempt_arm64.s
src/runtime/race_arm64.s
src/runtime/signal_arm64.go
src/runtime/stack.go
src/runtime/testdata/testprog/badtraceback.go
src/runtime/traceback.go
test/nosplit.go

index eae230dc070d86964125ee2472f9850c091c4e4e..490e1affb74de9fb44431c6cf54d9cdded315c03 100644 (file)
@@ -576,19 +576,19 @@ A function's stack frame, after the frame is created, is laid out as
 follows:
 
     +------------------------------+
+    | return PC                    |
+    | frame pointer on entry       | ← R29 points to
     | ... locals ...               |
     | ... outgoing arguments ...   |
-    | return PC                    | ← RSP points to
-    | frame pointer on entry       |
+    | unused word                  | ← RSP points to
     +------------------------------+ ↓ lower addresses
 
 The "return PC" is loaded to the link register, R30, as part of the
 arm64 `CALL` operation.
 
-On entry, a function subtracts from RSP to open its stack frame, and
-saves the values of R30 and R29 at the bottom of the frame.
-Specifically, R30 is saved at 0(RSP) and R29 is saved at -8(RSP),
-after RSP is updated.
+On entry, a function pushes R30 (the return address) and R29
+(the caller's frame pointer) onto the bottom of the stack. It then
+subtracts a constant from RSP to open its stack frame.
 
 A leaf function that does not require any stack space may omit the
 saved R30 and R29.
index 14027467002a3ebb61efcfcb3be0c79ee25f7d9f..6ba56b992eeda6bde124609d7ff4d8f450e2e5e0 100644 (file)
@@ -11,10 +11,12 @@ import (
 )
 
 func padframe(frame int64) int64 {
-       // arm64 requires that the frame size (not counting saved FP&LR)
-       // be 16 bytes aligned. If not, pad it.
-       if frame%16 != 0 {
-               frame += 16 - (frame % 16)
+       // arm64 requires frame sizes here that are 8 mod 16.
+       // With the additional (unused) slot at the bottom of the frame,
+       // that makes an aligned 16 byte frame.
+       // Adding a save region for LR+FP does not change the alignment.
+       if frame != 0 {
+               frame += (-(frame + 8)) & 15
        }
        return frame
 }
index 7bc0e536e941e645c44b32223f76e03248893c54..9f79a740c6ca81c815d95838e018e269133a9859 100644 (file)
@@ -221,7 +221,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 
                for i := 0; i < len(args); i++ {
                        a := args[i]
-                       // Offset by size of the saved LR slot.
+                       // Offset by size of the unused slot before start of args.
                        addr := ssagen.SpillSlotAddr(a, arm64.REGSP, base.Ctxt.Arch.FixedFrameSize)
                        // Look for double-register operations if we can.
                        if i < len(args)-1 {
index 0a2010363f8d0497881110aaee4c2f48cb3bf802..f0776172b9264af96a285320fe4e04a49d7ab96d 100644 (file)
@@ -393,10 +393,16 @@ func StackOffset(slot ssa.LocalSlot) int32 {
        case ir.PAUTO:
                off = n.FrameOffset()
                if base.Ctxt.Arch.FixedFrameSize == 0 {
+                       // x86 return address
                        off -= int64(types.PtrSize)
                }
                if buildcfg.FramePointerEnabled {
+                       // frame pointer
                        off -= int64(types.PtrSize)
+                       if buildcfg.GOARCH == "arm64" {
+                               // arm64 return address also
+                               off -= int64(types.PtrSize)
+                       }
                }
        }
        return int32(off + slot.Off)
index 1e2159579dfbf2e46167f731ed79d22b896aae38..107447f04cc4f6ed96b214bf45a2f505ee7d9e41 100644 (file)
@@ -7150,6 +7150,7 @@ func defframe(s *State, e *ssafn, f *ssa.Func) {
        // Insert code to zero ambiguously live variables so that the
        // garbage collector only sees initialized values when it
        // looks for pointers.
+       // Note: lo/hi are offsets from varp and will be negative.
        var lo, hi int64
 
        // Opaque state for backend to use. Current backends use it to
@@ -7157,7 +7158,7 @@ func defframe(s *State, e *ssafn, f *ssa.Func) {
        var state uint32
 
        // Iterate through declarations. Autos are sorted in decreasing
-       // frame offset order.
+       // frame offset order (least negative to most negative).
        for _, n := range e.curfn.Dcl {
                if !n.Needzero() {
                        continue
index 743d09a319087dfdd3e4fff57bd1e4dfa2cf9db6..281d705a3eb6145a25d0e1d03589aebfd4e37316 100644 (file)
@@ -51,7 +51,6 @@ type ctxt7 struct {
        blitrl     *obj.Prog
        elitrl     *obj.Prog
        autosize   int32
-       extrasize  int32
        instoffset int64
        pc         int64
        pool       struct {
@@ -1122,8 +1121,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                ctxt.Diag("arm64 ops not initialized, call arm64.buildop first")
        }
 
-       c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset & 0xffffffff), extrasize: int32(p.To.Offset >> 32)}
-       p.To.Offset &= 0xffffffff // extrasize is no longer needed
+       c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset)}
 
        // Process literal pool and allocate initial program counter for each Prog, before
        // generating branch veneers.
@@ -2119,8 +2117,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int {
                                // a.Offset is still relative to pseudo-SP.
                                a.Reg = obj.REG_NONE
                        }
-                       // The frame top 8 or 16 bytes are for FP
-                       c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize)
+                       // The frame top 16 bytes are for LR/FP
+                       c.instoffset = int64(c.autosize) + a.Offset - extrasize
                        return autoclass(c.instoffset)
 
                case obj.NAME_PARAM:
@@ -2180,8 +2178,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int {
                                // a.Offset is still relative to pseudo-SP.
                                a.Reg = obj.REG_NONE
                        }
-                       // The frame top 8 or 16 bytes are for FP
-                       c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize)
+                       // The frame top 16 bytes are for LR/FP
+                       c.instoffset = int64(c.autosize) + a.Offset - extrasize
 
                case obj.NAME_PARAM:
                        if a.Reg == REGSP {
index 2583e46354292f7f9327ff0e5c6c7aa70fa96a1b..a697426145185b45b1f0cbabf9d57fe7ddd656fd 100644 (file)
@@ -36,7 +36,6 @@ import (
        "cmd/internal/src"
        "cmd/internal/sys"
        "internal/abi"
-       "internal/buildcfg"
        "log"
        "math"
 )
@@ -472,6 +471,8 @@ func (c *ctxt7) rewriteToUseGot(p *obj.Prog) {
        obj.Nopout(p)
 }
 
+const extrasize = 16 // space needed in the frame for LR+FP
+
 func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
        if cursym.Func().Text == nil || cursym.Func().Text.Link == nil {
                return
@@ -521,33 +522,26 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                        c.autosize = int32(textstksiz)
 
                        if p.Mark&LEAF != 0 && c.autosize == 0 {
-                               // A leaf function with no locals has no frame.
+                               // A leaf function with no locals needs no frame.
                                p.From.Sym.Set(obj.AttrNoFrame, true)
                        }
 
                        if !p.From.Sym.NoFrame() {
                                // If there is a stack frame at all, it includes
-                               // space to save the LR.
+                               // space for the (now unused) word at [SP:SP+8].
                                c.autosize += 8
                        }
 
+                       // Round up to a multiple of 16.
+                       c.autosize += (-c.autosize) & 15
+
                        if c.autosize != 0 {
-                               extrasize := int32(0)
-                               if c.autosize%16 == 8 {
-                                       // Allocate extra 8 bytes on the frame top to save FP
-                                       extrasize = 8
-                               } else if c.autosize&(16-1) == 0 {
-                                       // Allocate extra 16 bytes to save FP for the old frame whose size is 8 mod 16
-                                       extrasize = 16
-                               } else {
-                                       c.ctxt.Diag("%v: unaligned frame size %d - must be 16 aligned", p, c.autosize-8)
-                               }
+                               // Allocate an extra 16 bytes at the top of the frame
+                               // to save LR+FP.
                                c.autosize += extrasize
                                c.cursym.Func().Locals += extrasize
 
-                               // low 32 bits for autosize
-                               // high 32 bits for extrasize
-                               p.To.Offset = int64(c.autosize) | int64(extrasize)<<32
+                               p.To.Offset = int64(c.autosize)
                        } else {
                                // NOFRAME
                                p.To.Offset = 0
@@ -580,120 +574,72 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                        var prologueEnd *obj.Prog
 
                        aoffset := c.autosize
-                       if aoffset > 0xf0 {
-                               // MOVD.W offset variant range is -0x100 to 0xf8, SP should be 16-byte aligned.
-                               // so the maximum aoffset value is 0xf0.
-                               aoffset = 0xf0
+                       if aoffset < 16 {
+                               log.Fatalf("aoffset too small %d", aoffset)
                        }
 
-                       // Frame is non-empty. Make sure to save link register, even if
-                       // it is a leaf function, so that traceback works.
                        q = p
-                       if c.autosize > aoffset {
-                               // Frame size is too large for a MOVD.W instruction. Store the frame pointer
-                               // register and link register before decrementing SP, so if a signal comes
-                               // during the execution of the function prologue, the traceback code will
-                               // not see a half-updated stack frame.
-
-                               // SUB $autosize, RSP, R20
-                               q1 = obj.Appendp(q, c.newprog)
-                               q1.Pos = p.Pos
-                               q1.As = ASUB
-                               q1.From.Type = obj.TYPE_CONST
-                               q1.From.Offset = int64(c.autosize)
-                               q1.Reg = REGSP
-                               q1.To.Type = obj.TYPE_REG
-                               q1.To.Reg = REG_R20
-
-                               prologueEnd = q1
-
-                               // STP (R29, R30), -8(R20)
-                               q1 = obj.Appendp(q1, c.newprog)
-                               q1.Pos = p.Pos
-                               q1.As = ASTP
-                               q1.From.Type = obj.TYPE_REGREG
-                               q1.From.Reg = REGFP
-                               q1.From.Offset = REGLINK
-                               q1.To.Type = obj.TYPE_MEM
-                               q1.To.Reg = REG_R20
-                               q1.To.Offset = -8
-
-                               // This is not async preemptible, as if we open a frame
-                               // at the current SP, it will clobber the saved LR.
-                               q1 = c.ctxt.StartUnsafePoint(q1, c.newprog)
-
-                               // MOVD R20, RSP
-                               q1 = obj.Appendp(q1, c.newprog)
-                               q1.Pos = p.Pos
-                               q1.As = AMOVD
-                               q1.From.Type = obj.TYPE_REG
-                               q1.From.Reg = REG_R20
-                               q1.To.Type = obj.TYPE_REG
-                               q1.To.Reg = REGSP
-                               q1.Spadj = c.autosize
-
-                               q1 = c.ctxt.EndUnsafePoint(q1, c.newprog, -1)
-
-                               if buildcfg.GOOS == "ios" {
-                                       // iOS does not support SA_ONSTACK. We will run the signal handler
-                                       // on the G stack. If we write below SP, it may be clobbered by
-                                       // the signal handler. So we save FP and LR after decrementing SP.
-                                       // STP (R29, R30), -8(RSP)
+
+                       // Store return address and frame pointer at the top of the stack frame.
+                       // STP.W (R29, R30), -16(SP)
+                       q1 = obj.Appendp(q, c.newprog)
+                       q1.Pos = p.Pos
+                       q1.As = ASTP
+                       q1.From.Type = obj.TYPE_REGREG
+                       q1.From.Reg = REGFP
+                       q1.From.Offset = REGLINK
+                       q1.To.Type = obj.TYPE_MEM
+                       q1.To.Reg = REG_RSP
+                       q1.To.Offset = -16
+                       q1.Scond = C_XPRE
+
+                       prologueEnd = q1
+
+                       // Update frame pointer
+                       q1 = obj.Appendp(q1, c.newprog)
+                       q1.Pos = p.Pos
+                       q1.As = AMOVD
+                       q1.From.Type = obj.TYPE_REG
+                       q1.From.Reg = REGSP
+                       q1.To.Type = obj.TYPE_REG
+                       q1.To.Reg = REGFP
+
+                       // Allocate additional frame space.
+                       adj := aoffset - 16
+                       if adj > 0 {
+                               // SUB $autosize-16, RSP
+                               if adj < 1<<12 {
+                                       q1 = obj.Appendp(q1, c.newprog)
+                                       q1.Pos = p.Pos
+                                       q1.As = ASUB
+                                       q1.From.Type = obj.TYPE_CONST
+                                       q1.From.Offset = int64(adj)
+                                       q1.To.Type = obj.TYPE_REG
+                                       q1.To.Reg = REGSP
+                               } else {
+                                       // Constant too big for atomic subtract.
+                                       // Materialize in tmp register first.
+                                       q1 = obj.Appendp(q1, c.newprog)
+                                       q1.Pos = p.Pos
+                                       q1.As = AMOVD
+                                       q1.From.Type = obj.TYPE_CONST
+                                       q1.From.Offset = int64(adj)
+                                       q1.To.Type = obj.TYPE_REG
+                                       q1.To.Reg = REGTMP
+
                                        q1 = obj.Appendp(q1, c.newprog)
                                        q1.Pos = p.Pos
-                                       q1.As = ASTP
-                                       q1.From.Type = obj.TYPE_REGREG
-                                       q1.From.Reg = REGFP
-                                       q1.From.Offset = REGLINK
-                                       q1.To.Type = obj.TYPE_MEM
+                                       q1.As = ASUB
+                                       q1.From.Type = obj.TYPE_REG
+                                       q1.From.Reg = REGTMP
+                                       q1.To.Type = obj.TYPE_REG
                                        q1.To.Reg = REGSP
-                                       q1.To.Offset = -8
                                }
-                       } else {
-                               // small frame, update SP and save LR in a single MOVD.W instruction.
-                               // So if a signal comes during the execution of the function prologue,
-                               // the traceback code will not see a half-updated stack frame.
-                               // Also, on Linux, in a cgo binary we may get a SIGSETXID signal
-                               // early on before the signal stack is set, as glibc doesn't allow
-                               // us to block SIGSETXID. So it is important that we don't write below
-                               // the SP until the signal stack is set.
-                               // Luckily, all the functions from thread entry to setting the signal
-                               // stack have small frames.
-                               q1 = obj.Appendp(q, c.newprog)
-                               q1.As = AMOVD
-                               q1.Pos = p.Pos
-                               q1.From.Type = obj.TYPE_REG
-                               q1.From.Reg = REGLINK
-                               q1.To.Type = obj.TYPE_MEM
-                               q1.Scond = C_XPRE
-                               q1.To.Offset = int64(-aoffset)
-                               q1.To.Reg = REGSP
-                               q1.Spadj = aoffset
-
-                               prologueEnd = q1
-
-                               // Frame pointer.
-                               q1 = obj.Appendp(q1, c.newprog)
-                               q1.Pos = p.Pos
-                               q1.As = AMOVD
-                               q1.From.Type = obj.TYPE_REG
-                               q1.From.Reg = REGFP
-                               q1.To.Type = obj.TYPE_MEM
-                               q1.To.Reg = REGSP
-                               q1.To.Offset = -8
+                               q1.Spadj = adj
                        }
 
                        prologueEnd.Pos = prologueEnd.Pos.WithXlogue(src.PosPrologueEnd)
 
-                       q1 = obj.Appendp(q1, c.newprog)
-                       q1.Pos = p.Pos
-                       q1.As = ASUB
-                       q1.From.Type = obj.TYPE_CONST
-                       q1.From.Offset = 8
-                       q1.Reg = REGSP
-                       q1.To.Type = obj.TYPE_REG
-                       q1.To.Reg = REGFP
-
                case obj.ARET:
                        nocache(p)
                        if p.From.Type == obj.TYPE_CONST {
@@ -707,105 +653,56 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                        }
                        p.To = obj.Addr{}
                        aoffset := c.autosize
-                       if c.cursym.Func().Text.Mark&LEAF != 0 {
-                               if aoffset != 0 {
-                                       // Restore frame pointer.
-                                       // ADD $framesize-8, RSP, R29
-                                       p.As = AADD
-                                       p.From.Type = obj.TYPE_CONST
-                                       p.From.Offset = int64(c.autosize) - 8
-                                       p.Reg = REGSP
-                                       p.To.Type = obj.TYPE_REG
-                                       p.To.Reg = REGFP
-
-                                       // Pop stack frame.
-                                       // ADD $framesize, RSP, RSP
-                                       p = obj.Appendp(p, c.newprog)
-                                       p.As = AADD
-                                       p.From.Type = obj.TYPE_CONST
-                                       p.From.Offset = int64(c.autosize)
-                                       p.To.Type = obj.TYPE_REG
-                                       p.To.Reg = REGSP
-                                       p.Spadj = -c.autosize
+                       if aoffset > 0 {
+                               if aoffset < 16 {
+                                       log.Fatalf("aoffset too small %d", aoffset)
+                               }
+                               adj := aoffset - 16
+                               if adj > 0 {
+                                       if adj < 1<<12 {
+                                               // ADD $adj, RSP, RSP
+                                               p.As = AADD
+                                               p.From.Type = obj.TYPE_CONST
+                                               p.From.Offset = int64(adj)
+                                               p.To.Type = obj.TYPE_REG
+                                               p.To.Reg = REGSP
+                                       } else {
+                                               // Put frame size in a separate register and
+                                               // add it in with a single instruction,
+                                               // so we never have a partial frame during
+                                               // the epilog. See issue 73259.
+
+                                               // MOVD $adj, REGTMP
+                                               p.As = AMOVD
+                                               p.From.Type = obj.TYPE_CONST
+                                               p.From.Offset = int64(adj)
+                                               p.To.Type = obj.TYPE_REG
+                                               p.To.Reg = REGTMP
+                                               // ADD REGTMP, RSP, RSP
+                                               p = obj.Appendp(p, c.newprog)
+                                               p.As = AADD
+                                               p.From.Type = obj.TYPE_REG
+                                               p.From.Reg = REGTMP
+                                               p.To.Type = obj.TYPE_REG
+                                               p.To.Reg = REGSP
+                                       }
+                                       p.Spadj = -adj
                                }
-                       } else if aoffset <= 0xF0 {
-                               // small frame, restore LR and update SP in a single MOVD.P instruction.
-                               // There is no correctness issue to use a single LDP for LR and FP,
-                               // but the instructions are not pattern matched with the prologue's
-                               // MOVD.W and MOVD, which may cause performance issue in
-                               // store-forwarding.
-
-                               // MOVD -8(RSP), R29
-                               p.As = AMOVD
-                               p.From.Type = obj.TYPE_MEM
-                               p.From.Reg = REGSP
-                               p.From.Offset = -8
-                               p.To.Type = obj.TYPE_REG
-                               p.To.Reg = REGFP
-                               p = obj.Appendp(p, c.newprog)
 
-                               // MOVD.P offset(RSP), R30
-                               p.As = AMOVD
-                               p.From.Type = obj.TYPE_MEM
-                               p.Scond = C_XPOST
-                               p.From.Offset = int64(aoffset)
-                               p.From.Reg = REGSP
-                               p.To.Type = obj.TYPE_REG
-                               p.To.Reg = REGLINK
-                               p.Spadj = -aoffset
-                       } else {
-                               // LDP -8(RSP), (R29, R30)
+                               // Pop LR+FP.
+                               // LDP.P 16(RSP), (R29, R30)
+                               if p.As != obj.ARET {
+                                       p = obj.Appendp(p, c.newprog)
+                               }
                                p.As = ALDP
                                p.From.Type = obj.TYPE_MEM
-                               p.From.Offset = -8
                                p.From.Reg = REGSP
+                               p.From.Offset = 16
+                               p.Scond = C_XPOST
                                p.To.Type = obj.TYPE_REGREG
                                p.To.Reg = REGFP
                                p.To.Offset = REGLINK
-
-                               if aoffset < 1<<12 {
-                                       // ADD $aoffset, RSP, RSP
-                                       q = newprog()
-                                       q.As = AADD
-                                       q.From.Type = obj.TYPE_CONST
-                                       q.From.Offset = int64(aoffset)
-                                       q.To.Type = obj.TYPE_REG
-                                       q.To.Reg = REGSP
-                                       q.Spadj = -aoffset
-                                       q.Pos = p.Pos
-                                       q.Link = p.Link
-                                       p.Link = q
-                                       p = q
-                               } else {
-                                       // Put frame size in a separate register and
-                                       // add it in with a single instruction,
-                                       // so we never have a partial frame during
-                                       // the epilog. See issue 73259.
-
-                                       // MOVD $aoffset, REGTMP
-                                       q = newprog()
-                                       q.As = AMOVD
-                                       q.From.Type = obj.TYPE_CONST
-                                       q.From.Offset = int64(aoffset)
-                                       q.To.Type = obj.TYPE_REG
-                                       q.To.Reg = REGTMP
-                                       q.Pos = p.Pos
-                                       q.Link = p.Link
-                                       p.Link = q
-                                       p = q
-                                       // ADD REGTMP, RSP, RSP
-                                       q = newprog()
-                                       q.As = AADD
-                                       q.From.Type = obj.TYPE_REG
-                                       q.From.Reg = REGTMP
-                                       q.To.Type = obj.TYPE_REG
-                                       q.To.Reg = REGSP
-                                       q.Spadj = -aoffset
-                                       q.Pos = p.Pos
-                                       q.Link = p.Link
-                                       p.Link = q
-                                       p = q
-                               }
+                               p.Spadj = -16
                        }
 
                        // If enabled, this code emits 'MOV PC, R27' before every 'MOV LR, PC',
@@ -868,10 +765,11 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                p.From.Type = obj.TYPE_REG
                                p.From.Reg = REGLINK
                        } else {
-                               /* MOVD (RSP), Rd */
+                               /* MOVD framesize-8(RSP), Rd */
                                p.As = AMOVD
                                p.From.Type = obj.TYPE_MEM
                                p.From.Reg = REGSP
+                               p.From.Offset = int64(c.autosize - 8)
                        }
                }
                if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.Spadj == 0 {
@@ -906,6 +804,12 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                        p.From.Reg = int16(REG_LSL + r + (shift&7)<<5)
                        p.From.Offset = 0
                }
+               if p.To.Type == obj.TYPE_MEM && p.To.Reg == REG_RSP && (p.Scond == C_XPRE || p.Scond == C_XPOST) {
+                       p.Spadj += int32(-p.To.Offset)
+               }
+               if p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_RSP && (p.Scond == C_XPRE || p.Scond == C_XPOST) {
+                       p.Spadj += int32(-p.From.Offset)
+               }
        }
 }
 
index 3a6141b9091eb6068821c9bfbcea3265491bd0a1..761496549f93aaa8addeca55d51f040163ba6e2f 100644 (file)
@@ -51,15 +51,16 @@ func Init() (*sys.Arch, ld.Arch) {
                Plan9Magic:  uint32(4*26*26 + 7),
                Plan9_64Bit: true,
 
-               Adddynrel:        adddynrel,
-               Archinit:         archinit,
-               Archreloc:        archreloc,
-               Archrelocvariant: archrelocvariant,
-               Gentext:          gentext,
-               Machoreloc1:      machoreloc1,
-               MachorelocSize:   8,
-               PEreloc1:         pereloc1,
-               TLSIEtoLE:        tlsIEtoLE,
+               Adddynrel:                 adddynrel,
+               Archinit:                  archinit,
+               Archreloc:                 archreloc,
+               Archrelocvariant:          archrelocvariant,
+               Gentext:                   gentext,
+               Machoreloc1:               machoreloc1,
+               MachorelocSize:            8,
+               PEreloc1:                  pereloc1,
+               TLSIEtoLE:                 tlsIEtoLE,
+               ReturnAddressAtTopOfFrame: true,
 
                ELF: ld.ELFArch{
                        Linuxdynld:     "/lib64/ld-linux-x86-64.so.2",
index 3d358155badbca1e07d63b55dd4f7c790c48ace1..e1e4ade81835c6eb945cf52390d2cdaee123a968 100644 (file)
@@ -47,17 +47,18 @@ func Init() (*sys.Arch, ld.Arch) {
                Dwarfreglr: dwarfRegLR,
                TrampLimit: 0x7c00000, // 26-bit signed offset * 4, leave room for PLT etc.
 
-               Adddynrel:        adddynrel,
-               Archinit:         archinit,
-               Archreloc:        archreloc,
-               Archrelocvariant: archrelocvariant,
-               Extreloc:         extreloc,
-               Gentext:          gentext,
-               GenSymsLate:      gensymlate,
-               Machoreloc1:      machoreloc1,
-               MachorelocSize:   8,
-               PEreloc1:         pereloc1,
-               Trampoline:       trampoline,
+               Adddynrel:                 adddynrel,
+               Archinit:                  archinit,
+               Archreloc:                 archreloc,
+               Archrelocvariant:          archrelocvariant,
+               Extreloc:                  extreloc,
+               Gentext:                   gentext,
+               GenSymsLate:               gensymlate,
+               Machoreloc1:               machoreloc1,
+               MachorelocSize:            8,
+               PEreloc1:                  pereloc1,
+               Trampoline:                trampoline,
+               ReturnAddressAtTopOfFrame: true,
 
                ELF: ld.ELFArch{
                        Androiddynld:   "/system/bin/linker64",
index 0003938ef2e03665e0f9e22b8524351d9b00c2a9..c4d12a5488df3c7f6783f6cb3dd9215889be8309 100644 (file)
@@ -1544,9 +1544,14 @@ func (d *dwctxt) writeframes(fs loader.Sym) dwarfSecInfo {
                                if pcsp.Value > 0 {
                                        // The return address is preserved at (CFA-frame_size)
                                        // after a stack frame has been allocated.
+                                       off := -spdelta
+                                       if thearch.ReturnAddressAtTopOfFrame {
+                                               // Except arm64, which has it at the top of frame.
+                                               off = -int64(d.arch.PtrSize)
+                                       }
                                        deltaBuf = append(deltaBuf, dwarf.DW_CFA_offset_extended_sf)
                                        deltaBuf = dwarf.AppendUleb128(deltaBuf, uint64(thearch.Dwarfreglr))
-                                       deltaBuf = dwarf.AppendSleb128(deltaBuf, -spdelta/dataAlignmentFactor)
+                                       deltaBuf = dwarf.AppendSleb128(deltaBuf, off/dataAlignmentFactor)
                                } else {
                                        // The return address is restored into the link register
                                        // when a stack frame has been de-allocated.
index 2c861129b52f9abb00b7b7feef3600acd9ae4484..5f5ebfc1d9855cc2fca5f05e2aa741fc5b697c88 100644 (file)
@@ -263,6 +263,10 @@ type Arch struct {
        // optional override for assignAddress
        AssignAddress func(ldr *loader.Loader, sect *sym.Section, n int, s loader.Sym, va uint64, isTramp bool) (*sym.Section, int, uint64)
 
+       // Reports whether the return address is stored at the top (highest address)
+       // of the stack frame.
+       ReturnAddressAtTopOfFrame bool
+
        // ELF specific information.
        ELF ELFArch
 }
index 98e7edaeb1477b4d5c8136e51954f961b778639d..14cd3a22384f885fe5b3030d69897babebce4ca4 100644 (file)
@@ -9,7 +9,6 @@ import (
        "cmd/internal/objabi"
        "cmd/link/internal/loader"
        "fmt"
-       "internal/buildcfg"
        "sort"
        "strings"
 )
@@ -62,10 +61,6 @@ func (ctxt *Link) doStackCheck() {
        // that there are at least StackLimit bytes available below SP
        // when morestack returns.
        limit := objabi.StackNosplit(*flagRace) - sc.callSize
-       if buildcfg.GOARCH == "arm64" {
-               // Need an extra 8 bytes below SP to save FP.
-               limit -= 8
-       }
 
        // Compute stack heights without any back-tracking information.
        // This will almost certainly succeed and we can simply
index 4336f01ea3d53677dff711a7627c090dc556f712..a4885fde8fd06f4abdebe3d99ab490abf1aaf3ab 100644 (file)
@@ -50,13 +50,14 @@ func Init() (*sys.Arch, ld.Arch) {
 
                Plan9Magic: uint32(4*11*11 + 7),
 
-               Adddynrel:        adddynrel,
-               Archinit:         archinit,
-               Archreloc:        archreloc,
-               Archrelocvariant: archrelocvariant,
-               Gentext:          gentext,
-               Machoreloc1:      machoreloc1,
-               PEreloc1:         pereloc1,
+               Adddynrel:                 adddynrel,
+               Archinit:                  archinit,
+               Archreloc:                 archreloc,
+               Archrelocvariant:          archrelocvariant,
+               Gentext:                   gentext,
+               Machoreloc1:               machoreloc1,
+               PEreloc1:                  pereloc1,
+               ReturnAddressAtTopOfFrame: true,
 
                ELF: ld.ELFArch{
                        Linuxdynld:     "/lib/ld-linux.so.2",
index a0e82ec830f74b58a0a82641884559b81103f0d7..aa49a27a75d1e34f29414679598c7b98691d773a 100644 (file)
@@ -50,9 +50,7 @@ TEXT _rt0_arm64_lib(SB),NOSPLIT,$184
        CBZ     R4, nocgo
        MOVD    $_rt0_arm64_lib_go(SB), R0
        MOVD    $0, R1
-       SUB     $16, RSP                // reserve 16 bytes for sp-8 where fp may be saved.
        BL      (R4)
-       ADD     $16, RSP
        B       restore
 
 nocgo:
@@ -371,7 +369,6 @@ switch:
        BL      runtime·save_g(SB)
        MOVD    (g_sched+gobuf_sp)(g), R0
        MOVD    R0, RSP
-       MOVD    (g_sched+gobuf_bp)(g), R29
        MOVD    $0, (g_sched+gobuf_sp)(g)
        MOVD    $0, (g_sched+gobuf_bp)(g)
        RET
@@ -381,8 +378,8 @@ noswitch:
        // Using a tail call here cleans up tracebacks since we won't stop
        // at an intermediate systemstack.
        MOVD    0(R26), R3      // code pointer
-       MOVD.P  16(RSP), R30    // restore LR
-       SUB     $8, RSP, R29    // restore FP
+       ADD     $16, RSP
+       LDP.P   16(RSP), (R29,R30)      // restore FP, LR
        B       (R3)
 
 // func switchToCrashStack0(fn func())
@@ -1051,7 +1048,7 @@ again:
 // Smashes R0.
 TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
        MOVD    $runtime·systemstack_switch(SB), R0
-       ADD     $8, R0  // get past prologue
+       ADD     $12, R0 // get past prologue
        MOVD    R0, (g_sched+gobuf_pc)(g)
        MOVD    RSP, R0
        MOVD    R0, (g_sched+gobuf_sp)(g)
@@ -1069,9 +1066,7 @@ TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-16
        MOVD    fn+0(FP), R1
        MOVD    arg+8(FP), R0
-       SUB     $16, RSP        // skip over saved frame pointer below RSP
        BL      (R1)
-       ADD     $16, RSP        // skip over saved frame pointer below RSP
        RET
 
 // func asmcgocall(fn, arg unsafe.Pointer) int32
@@ -1236,9 +1231,9 @@ havem:
        BL      runtime·save_g(SB)
        MOVD    (g_sched+gobuf_sp)(g), R4 // prepare stack as R4
        MOVD    (g_sched+gobuf_pc)(g), R5
-       MOVD    R5, -48(R4)
+       MOVD    R5, -8(R4)
        MOVD    (g_sched+gobuf_bp)(g), R5
-       MOVD    R5, -56(R4)
+       MOVD    R5, -16(R4)
        // Gather our arguments into registers.
        MOVD    fn+0(FP), R1
        MOVD    frame+8(FP), R2
@@ -1252,7 +1247,7 @@ havem:
        CALL    (R0) // indirect call to bypass nosplit check. We're on a different stack now.
 
        // Restore g->sched (== m->curg->sched) from saved values.
-       MOVD    0(RSP), R5
+       MOVD    40(RSP), R5
        MOVD    R5, (g_sched+gobuf_pc)(g)
        MOVD    RSP, R4
        ADD     $48, R4, R4
@@ -1490,10 +1485,57 @@ GLOBL   debugCallFrameTooLarge<>(SB), RODATA, $20       // Size duplicated below
 //
 // This is ABIInternal because Go code injects its PC directly into new
 // goroutine stacks.
+//
+// State before debugger starts doing anything:
+// |   current   |
+// |   stack     |
+// +-------------+ <- SP = origSP
+// stopped executing at PC = origPC
+// some values are in LR (origLR) and FP (origFP)
+//
+// After debugger has done steps 1-6 above:
+// |   current   |
+// |   stack     |
+// +-------------+ <- origSP
+// |    -----    | (used to be a slot to store frame pointer on entry to origPC's frame.)
+// +-------------+
+// |   origLR    |
+// +-------------+ <- SP
+// |    -----    |
+// +-------------+
+// |   argsize   |
+// +-------------+
+// LR = origPC, PC = debugCallV2
+//
+// debugCallV2 then modifies the stack up to the "good" label:
+// |   current   |
+// |   stack     |
+// +-------------+ <- origSP
+// |    -----    | (used to be a slot to store frame pointer on entry to origPC's frame.)
+// +-------------+
+// |   origLR    |
+// +-------------+ <- where debugger left SP
+// |   origPC    |
+// +-------------+
+// |   origFP    |
+// +-------------+ <- FP = SP + 256
+// |   saved     |
+// |  registers  |
+// | (224 bytes) |
+// +-------------+ <- SP + 32
+// |  space for  |
+// |   outargs   |
+// +-------------+ <- SP + 8
+// |   argsize   |
+// +-------------+ <- SP
+
 TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
-       STP     (R29, R30), -280(RSP)
-       SUB     $272, RSP, RSP
-       SUB     $8, RSP, R29
+       MOVD    R30, -8(RSP)            // save origPC
+       MOVD    -16(RSP), R30           // save argsize in R30 temporarily
+       MOVD.W  R29, -16(RSP)           // push origFP
+       MOVD    RSP, R29                // frame pointer chain now set up
+       SUB     $256, RSP, RSP          // allocate frame
+       MOVD    R30, (RSP)              // Save argsize on the stack
        // Save all registers that may contain pointers so they can be
        // conservatively scanned.
        //
@@ -1515,7 +1557,8 @@ TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
        STP     (R0, R1), (4*8)(RSP)
 
        // Perform a safe-point check.
-       MOVD    R30, 8(RSP) // Caller's PC
+       MOVD    264(RSP), R0 // origPC
+       MOVD    R0, 8(RSP)
        CALL    runtime·debugCallCheck(SB)
        MOVD    16(RSP), R0
        CBZ     R0, good
@@ -1559,7 +1602,7 @@ good:
        CALL    runtime·debugCallWrap(SB);     \
        JMP     restore
 
-       MOVD    256(RSP), R0 // the argument frame size
+       MOVD    (RSP), R0 // the argument frame size
        DEBUG_CALL_DISPATCH(debugCall32<>, 32)
        DEBUG_CALL_DISPATCH(debugCall64<>, 64)
        DEBUG_CALL_DISPATCH(debugCall128<>, 128)
@@ -1607,9 +1650,9 @@ restore:
        LDP     (6*8)(RSP), (R2, R3)
        LDP     (4*8)(RSP), (R0, R1)
 
-       LDP     -8(RSP), (R29, R27)
-       ADD     $288, RSP, RSP // Add 16 more bytes, see saveSigContext
-       MOVD    -16(RSP), R30 // restore old lr
+       MOVD    272(RSP), R30           // restore old lr (saved by (*sigctxt).pushCall)
+       LDP     256(RSP), (R29, R27)    // restore old fp, set up resumption address
+       ADD     $288, RSP, RSP          // Pop frame, LR+FP, and block pushed by (*sigctxt).pushCall
        JMP     (R27)
 
 // runtime.debugCallCheck assumes that functions defined with the
index 769c4ffc5c9eeb847a26054a8d53babd5cf5ad7f..9064cae039f00d7f213479635559b8f451f82ad7 100644 (file)
@@ -488,26 +488,18 @@ func genARM64(g *gen) {
                l.stack += 8 // SP needs 16-byte alignment
        }
 
-       // allocate frame, save PC of interrupted instruction (in LR)
-       p("MOVD R30, %d(RSP)", -l.stack)
+       // allocate frame, save PC (in R30), FP (in R29) of interrupted instruction
+       p("STP.W (R29, R30), -16(RSP)")
+       p("MOVD RSP, R29") // set up new frame pointer
        p("SUB $%d, RSP", l.stack)
-       p("MOVD R29, -8(RSP)") // save frame pointer (only used on Linux)
-       p("SUB $8, RSP, R29")  // set up new frame pointer
-       // On iOS, save the LR again after decrementing SP. We run the
-       // signal handler on the G stack (as it doesn't support sigaltstack),
-       // so any writes below SP may be clobbered.
-       p("#ifdef GOOS_ios")
-       p("MOVD R30, (RSP)")
-       p("#endif")
 
        l.save(g)
        p("CALL ·asyncPreempt2(SB)")
        l.restore(g)
 
-       p("MOVD %d(RSP), R30", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
-       p("MOVD -8(RSP), R29")          // restore frame pointer
-       p("MOVD (RSP), R27")            // load PC to REGTMP
-       p("ADD $%d, RSP", l.stack+16)   // pop frame (including the space pushed by sigctxt.pushCall)
+       p("MOVD %d(RSP), R30", l.stack+16)    // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
+       p("LDP %d(RSP), (R29, R27)", l.stack) // Restore frame pointer. Load PC into regtmp.
+       p("ADD $%d, RSP", l.stack+32)         // pop frame (including the space pushed by sigctxt.pushCall)
        p("RET (R27)")
 }
 
index 8c91c9435abd18d81a644289480e3ec2c83e48bc..04b3afe16827655ebb2959ce7697126d2144d188 100644 (file)
@@ -1379,10 +1379,10 @@ func recovery(gp *g) {
                // the caller
                gp.sched.bp = fp - 2*goarch.PtrSize
        case goarch.IsArm64 != 0:
-               // on arm64, the architectural bp points one word higher
-               // than the sp. fp is totally useless to us here, because it
-               // only gets us to the caller's fp.
-               gp.sched.bp = sp - goarch.PtrSize
+               // on arm64, the first two words of the frame are caller's PC
+               // (the saved LR register) and the caller's BP.
+               // Coincidentally, the same as amd64.
+               gp.sched.bp = fp - 2*goarch.PtrSize
        }
        gogo(&gp.sched)
 }
index 31ec9d940f76d4944b8f4d35892d5f011b201aca..f4248cac257550f629cd62feb99a07b71eff2389 100644 (file)
@@ -4,13 +4,9 @@
 #include "textflag.h"
 
 TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
-       MOVD R30, -496(RSP)
+       STP.W (R29, R30), -16(RSP)
+       MOVD RSP, R29
        SUB $496, RSP
-       MOVD R29, -8(RSP)
-       SUB $8, RSP, R29
-       #ifdef GOOS_ios
-       MOVD R30, (RSP)
-       #endif
        STP (R0, R1), 8(RSP)
        STP (R2, R3), 24(RSP)
        STP (R4, R5), 40(RSP)
@@ -78,8 +74,7 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
        LDP 40(RSP), (R4, R5)
        LDP 24(RSP), (R2, R3)
        LDP 8(RSP), (R0, R1)
-       MOVD 496(RSP), R30
-       MOVD -8(RSP), R29
-       MOVD (RSP), R27
-       ADD $512, RSP
+       MOVD 512(RSP), R30
+       LDP 496(RSP), (R29, R27)
+       ADD $528, RSP
        RET (R27)
index 5df650105bb4d57a52a92a3be3ddd86219040035..feaa328d4c0d8af2bf86f9b0bf10d84097276234 100644 (file)
@@ -397,7 +397,7 @@ TEXT        racecallatomic<>(SB), NOSPLIT, $0
        // R3 = addr of incoming arg list
 
        // Trigger SIGSEGV early.
-       MOVD    40(RSP), R3     // 1st arg is addr. after two times BL, get it at 40(RSP)
+       MOVD    72(RSP), R3     // 1st arg is addr. after two small frames (32 bytes each), get it at 72(RSP)
        MOVB    (R3), R13       // segv here if addr is bad
        // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
        MOVD    runtime·racearenastart(SB), R10
@@ -417,10 +417,11 @@ racecallatomic_ok:
        // Addr is within the good range, call the atomic function.
        load_g
        MOVD    g_racectx(g), R0        // goroutine context
-       MOVD    16(RSP), R1     // caller pc
+       MOVD    56(RSP), R1     // caller pc
        MOVD    R9, R2  // pc
-       ADD     $40, RSP, R3
-       JMP     racecall<>(SB)  // does not return
+       ADD     $72, RSP, R3
+       BL      racecall<>(SB)
+       RET
 racecallatomic_ignore:
        // Addr is outside the good range.
        // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
@@ -435,9 +436,9 @@ racecallatomic_ignore:
        // racecall will call LLVM race code which might clobber R28 (g)
        load_g
        MOVD    g_racectx(g), R0        // goroutine context
-       MOVD    16(RSP), R1     // caller pc
+       MOVD    56(RSP), R1     // caller pc
        MOVD    R9, R2  // pc
-       ADD     $40, RSP, R3    // arguments
+       ADD     $72, RSP, R3    // arguments
        BL      racecall<>(SB)
        // Call __tsan_go_ignore_sync_end.
        MOVD    $__tsan_go_ignore_sync_end(SB), R9
@@ -476,10 +477,6 @@ TEXT       racecall<>(SB), NOSPLIT|NOFRAME, $0-0
        MOVD    (g_sched+gobuf_sp)(R11), R12
        MOVD    R12, RSP
 call:
-       // Decrement SP past where the frame pointer is saved in the Go arm64
-       // ABI (one word below the stack pointer) so the race detector library
-       // code doesn't clobber it
-       SUB     $16, RSP
        BL      R9
        MOVD    R19, RSP
        JMP     (R20)
index af7d29f9de1d31d457d0d65b833b477eb934a1c7..61dad507219cafb9ac9b0cf82c7248b5dbec69f5 100644 (file)
@@ -8,7 +8,6 @@ package runtime
 
 import (
        "internal/abi"
-       "internal/goarch"
        "internal/runtime/sys"
        "unsafe"
 )
@@ -63,18 +62,11 @@ func (c *sigctxt) preparePanic(sig uint32, gp *g) {
        // We arrange lr, and pc to pretend the panicking
        // function calls sigpanic directly.
        // Always save LR to stack so that panics in leaf
-       // functions are correctly handled. This smashes
-       // the stack frame but we're not going back there
-       // anyway.
+       // functions are correctly handled.
+       // This extra space is known to gentraceback.
        sp := c.sp() - sys.StackAlign // needs only sizeof uint64, but must align the stack
        c.set_sp(sp)
        *(*uint64)(unsafe.Pointer(uintptr(sp))) = c.lr()
-       // Make sure a valid frame pointer is saved on the stack so that the
-       // frame pointer checks in adjustframe are happy, if they're enabled.
-       // Frame pointer unwinding won't visit the sigpanic frame, since
-       // sigpanic will save the same frame pointer before calling into a panic
-       // function.
-       *(*uint64)(unsafe.Pointer(uintptr(sp - goarch.PtrSize))) = c.r29()
 
        pc := gp.sigpc
 
@@ -96,10 +88,6 @@ func (c *sigctxt) pushCall(targetPC, resumePC uintptr) {
        sp := c.sp() - 16 // SP needs 16-byte alignment
        c.set_sp(sp)
        *(*uint64)(unsafe.Pointer(uintptr(sp))) = c.lr()
-       // Make sure a valid frame pointer is saved on the stack so that the
-       // frame pointer checks in adjustframe are happy, if they're enabled.
-       // This is not actually used for unwinding.
-       *(*uint64)(unsafe.Pointer(uintptr(sp - goarch.PtrSize))) = c.r29()
        // Set up PC and LR to pretend the function being signaled
        // calls targetPC at resumePC.
        c.set_lr(uint64(resumePC))
index 55e97e77afa9576b515901d7e237145488e03b1d..5eaceec6da14d5b4f5173039a834564b8431f9dc 100644 (file)
@@ -579,23 +579,27 @@ var ptrnames = []string{
 // |  args to callee  |
 // +------------------+ <- frame->sp
 //
-// (arm)
+// (arm64)
 // +------------------+
 // | args from caller |
 // +------------------+ <- frame->argp
-// | caller's retaddr |
+// |     <unused>     |
+// +------------------+ <- frame->fp (aka caller's sp)
+// |  return address  |
 // +------------------+
-// |  caller's FP (*) | (*) on ARM64, if framepointer_enabled && varp > sp
+// |  caller's FP     |  (frame pointer always enabled: TODO)
 // +------------------+ <- frame->varp
 // |     locals       |
 // +------------------+
 // |  args to callee  |
 // +------------------+
-// |  return address  |
+// |     <unused>     |
 // +------------------+ <- frame->sp
 //
 // varp > sp means that the function has a frame;
 // varp == sp means frameless function.
+//
+// Alignment padding, if needed, will be between "locals" and "args to callee".
 
 type adjustinfo struct {
        old   stack
@@ -709,7 +713,8 @@ func adjustframe(frame *stkframe, adjinfo *adjustinfo) {
        }
 
        // Adjust saved frame pointer if there is one.
-       if (goarch.ArchFamily == goarch.AMD64 || goarch.ArchFamily == goarch.ARM64) && frame.argp-frame.varp == 2*goarch.PtrSize {
+       if goarch.ArchFamily == goarch.AMD64 && frame.argp-frame.varp == 2*goarch.PtrSize ||
+               goarch.ArchFamily == goarch.ARM64 && frame.argp-frame.varp == 3*goarch.PtrSize {
                if stackDebug >= 3 {
                        print("      saved bp\n")
                }
@@ -723,10 +728,7 @@ func adjustframe(frame *stkframe, adjinfo *adjustinfo) {
                                throw("bad frame pointer")
                        }
                }
-               // On AMD64, this is the caller's frame pointer saved in the current
-               // frame.
-               // On ARM64, this is the frame pointer of the caller's caller saved
-               // by the caller in its frame (one word below its SP).
+               // This is the caller's frame pointer saved in the current frame.
                adjustpointer(adjinfo, unsafe.Pointer(frame.varp))
        }
 
index 455118a54371d79fbf0cb4abf5738ba30e9882c7..36575f765db9a596b541bd6c68f3eec027fe2f98 100644 (file)
@@ -41,6 +41,11 @@ func badLR2(arg int) {
        if runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" {
                lrOff = 32 // FIXED_FRAME or sys.MinFrameSize
        }
+       if runtime.GOARCH == "arm64" {
+               // skip 8 bytes at bottom of parent frame, then point
+               // to the 8 bytes of the saved PC at the top of the frame.
+               lrOff = 16
+       }
        lrPtr := (*uintptr)(unsafe.Pointer(uintptr(unsafe.Pointer(&arg)) - lrOff))
        *lrPtr = 0xbad
 
index 8882c306edb73637ff05cecefed47cbf2db83dac..1c3e679a02bdaf964ea8e7e8c8c14eed8ef62dfe 100644 (file)
@@ -175,6 +175,11 @@ func (u *unwinder) initAt(pc0, sp0, lr0 uintptr, gp *g, flags unwindFlags) {
        // Start in the caller's frame.
        if frame.pc == 0 {
                if usesLR {
+                       // TODO: this isn't right on arm64. But also, this should
+                       // ~never happen. Calling a nil function will panic
+                       // when loading the PC out of the closure, not when
+                       // branching to that PC. (Closures should always have
+                       // valid PCs in their first word.)
                        frame.pc = *(*uintptr)(unsafe.Pointer(frame.sp))
                        frame.lr = 0
                } else {
@@ -369,7 +374,11 @@ func (u *unwinder) resolveInternal(innermost, isSyscall bool) {
                var lrPtr uintptr
                if usesLR {
                        if innermost && frame.sp < frame.fp || frame.lr == 0 {
-                               lrPtr = frame.sp
+                               if GOARCH == "arm64" {
+                                       lrPtr = frame.fp - goarch.PtrSize
+                               } else {
+                                       lrPtr = frame.sp
+                               }
                                frame.lr = *(*uintptr)(unsafe.Pointer(lrPtr))
                        }
                } else {
@@ -385,24 +394,17 @@ func (u *unwinder) resolveInternal(innermost, isSyscall bool) {
                // On x86, call instruction pushes return PC before entering new function.
                frame.varp -= goarch.PtrSize
        }
+       if GOARCH == "arm64" && frame.varp > frame.sp {
+               frame.varp -= goarch.PtrSize // LR have been saved, skip over it.
+       }
 
        // For architectures with frame pointers, if there's
        // a frame, then there's a saved frame pointer here.
        //
        // NOTE: This code is not as general as it looks.
-       // On x86, the ABI is to save the frame pointer word at the
+       // On x86 and arm64, the ABI is to save the frame pointer word at the
        // top of the stack frame, so we have to back down over it.
-       // On arm64, the frame pointer should be at the bottom of
-       // the stack (with R29 (aka FP) = RSP), in which case we would
-       // not want to do the subtraction here. But we started out without
-       // any frame pointer, and when we wanted to add it, we didn't
-       // want to break all the assembly doing direct writes to 8(RSP)
-       // to set the first parameter to a called function.
-       // So we decided to write the FP link *below* the stack pointer
-       // (with R29 = RSP - 8 in Go functions).
-       // This is technically ABI-compatible but not standard.
-       // And it happens to end up mimicking the x86 layout.
-       // Other architectures may make different decisions.
+       // No other architectures are framepointer-enabled at the moment.
        if frame.varp > frame.sp && framepointer_enabled {
                frame.varp -= goarch.PtrSize
        }
@@ -562,7 +564,7 @@ func (u *unwinder) finishInternal() {
        gp := u.g.ptr()
        if u.flags&(unwindPrintErrors|unwindSilentErrors) == 0 && u.frame.sp != gp.stktopsp {
                print("runtime: g", gp.goid, ": frame.sp=", hex(u.frame.sp), " top=", hex(gp.stktopsp), "\n")
-               print("\tstack=[", hex(gp.stack.lo), "-", hex(gp.stack.hi), "\n")
+               print("\tstack=[", hex(gp.stack.lo), "-", hex(gp.stack.hi), "]\n")
                throw("traceback did not unwind completely")
        }
 }
index 4b4c93b1d067c5a4520be675f1ca9190cdec65cc..1f943fa18c3f5899ea2ee503d98c2349afb1b8dd 100644 (file)
@@ -142,7 +142,7 @@ start 136
 # (CallSize is 32 on ppc64, 8 on amd64 for frame pointer.)
 start 96 nosplit
 start 100 nosplit; REJECT ppc64 ppc64le
-start 104 nosplit; REJECT ppc64 ppc64le arm64
+start 104 nosplit; REJECT ppc64 ppc64le
 start 108 nosplit; REJECT ppc64 ppc64le
 start 112 nosplit; REJECT ppc64 ppc64le arm64
 start 116 nosplit; REJECT ppc64 ppc64le
@@ -160,7 +160,7 @@ start 136 nosplit; REJECT
 # Because AMD64 uses frame pointer, it has 8 fewer bytes.
 start 96 nosplit call f; f 0 nosplit
 start 100 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le
-start 104 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le arm64
+start 104 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le
 start 108 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le
 start 112 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le amd64 arm64
 start 116 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le amd64
@@ -176,7 +176,7 @@ start 136 nosplit call f; f 0 nosplit; REJECT
 # Architectures differ in the same way as before.
 start 96 nosplit call f; f 0 call f
 start 100 nosplit call f; f 0 call f; REJECT ppc64 ppc64le
-start 104 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 arm64
+start 104 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64
 start 108 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64
 start 112 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 arm64
 start 116 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64
@@ -189,7 +189,7 @@ start 136 nosplit call f; f 0 call f; REJECT
 # Indirect calls are assumed to be splitting functions.
 start 96 nosplit callind
 start 100 nosplit callind; REJECT ppc64 ppc64le
-start 104 nosplit callind; REJECT ppc64 ppc64le amd64 arm64
+start 104 nosplit callind; REJECT ppc64 ppc64le amd64
 start 108 nosplit callind; REJECT ppc64 ppc64le amd64
 start 112 nosplit callind; REJECT ppc64 ppc64le amd64 arm64
 start 116 nosplit callind; REJECT ppc64 ppc64le amd64