From 9b331189c15c4a96651c3d6842d5bd8ee5b5f462 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 25 Jan 2018 11:35:27 -0500 Subject: [PATCH] cmd/internal/obj/x86: adjust SP correctly for tail calls Currently, tail calls on x86 don't adjust the SP on return, so it's important that the compiler produce a zero-sized frame and disable the frame pointer. However, these constraints aren't necessary. For example, on other architectures it's generally necessary to restore the saved LR before a tail call, so obj simply makes this work. Likewise, on x86, there's no reason we can't simply make this work. Hence, this CL adjusts the compiler to use the same tail call convention for x86 that we use on LR machines by producing a RET with a target, rather than a JMP with a target. In fact, obj already understands this convention for x86 except that it's buggy with non-zero frame sizes. So we also fix this bug obj. As a result of these fixes, the compiler no longer needs to mark wrappers as NoFramePointer since it's now perfectly fine to save the frame pointer. In fact, this eliminates the only use of NoFramePointer in the compiler, which will enable further cleanups. This also fixes what is very nearly, but not quite, a code generation bug. NoFramePointer becomes obj.NOFRAME in the object file, which on ppc64 and s390x means to omit the saved LR. Hence, on these architectures, NoFramePointer (and NOFRAME) is only safe to set on leaf functions. However, on *most* architectures, wrappers aren't necessarily leaf functions because they may call DUFFZERO. We're saved on ppc64 and s390x only because the compiler doesn't have the rules to produce DUFFZERO calls on these architectures. Hence, this only works because the set of LR architectures that implement NOFRAME is disjoint from the set where the compiler produces DUFFZERO operations. (I discovered this whole mess when I attempted to add NOFRAME support to arm.) Change-Id: Icc589aeb86beacb850d0a6a80bd3024974a33947 Reviewed-on: https://go-review.googlesource.com/92035 Run-TryBot: Austin Clements TryBot-Result: Gobot Gobot Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/amd64/ssa.go | 2 +- src/cmd/compile/internal/gc/subr.go | 2 -- src/cmd/internal/obj/x86/obj6.go | 3 +++ 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index ce322e5e99..e3129edbf1 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -1076,7 +1076,7 @@ func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { case ssa.BlockRet: s.Prog(obj.ARET) case ssa.BlockRetJmp: - p := s.Prog(obj.AJMP) + p := s.Prog(obj.ARET) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = b.Aux.(*obj.LSym) diff --git a/src/cmd/compile/internal/gc/subr.go b/src/cmd/compile/internal/gc/subr.go index e0925dc7f2..5143c3e3d5 100644 --- a/src/cmd/compile/internal/gc/subr.go +++ b/src/cmd/compile/internal/gc/subr.go @@ -1759,8 +1759,6 @@ func genwrapper(rcvr *types.Type, method *types.Field, newnam *types.Sym, iface as.Right.Type = rcvr fn.Nbody.Append(as) fn.Nbody.Append(nodSym(ORETJMP, nil, methodsym(method.Sym, methodrcvr, false))) - // When tail-calling, we can't use a frame pointer. - fn.Func.SetNoFramePointer(true) } else { fn.Func.SetWrapper(true) // ignore frame for panic+recover matching call := nod(OCALL, dot, nil) diff --git a/src/cmd/internal/obj/x86/obj6.go b/src/cmd/internal/obj/x86/obj6.go index 7b5e4769dc..c31d458d4a 100644 --- a/src/cmd/internal/obj/x86/obj6.go +++ b/src/cmd/internal/obj/x86/obj6.go @@ -917,6 +917,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } if autoffset != 0 { + to := p.To // Keep To attached to RET for retjmp below + p.To = obj.Addr{} if bpsize > 0 { // Restore caller's BP p.As = AMOVQ @@ -936,6 +938,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.Spadj = -autoffset p = obj.Appendp(p, newprog) p.As = obj.ARET + p.To = to // If there are instructions following // this ARET, they come from a branch -- 2.48.1