From a06b08e7d1837df636d24f945fe9b3b2a369b450 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 2 Apr 2021 17:20:15 -0400 Subject: [PATCH] cmd/internal/obj/ppc64: simplify huge frame prologue CL 307010 for ppc64. I spent a long time trying to figure out how to use the carry bit from ADDCCC to further simplify this (like what we do on arm64), but gave up after I couldn't figure out how to access the carry bit without just adding more instructions. Change-Id: I6cad51b93616865b203cb16554f16121375aabbc Reviewed-on: https://go-review.googlesource.com/c/go/+/307149 Trust: Austin Clements Run-TryBot: Austin Clements Reviewed-by: Cherry Zhang --- src/cmd/internal/obj/ppc64/obj9.go | 103 ++++++++++++----------------- 1 file changed, 44 insertions(+), 59 deletions(-) diff --git a/src/cmd/internal/obj/ppc64/obj9.go b/src/cmd/internal/obj/ppc64/obj9.go index a77be29cf0..c2722b0afb 100644 --- a/src/cmd/internal/obj/ppc64/obj9.go +++ b/src/cmd/internal/obj/ppc64/obj9.go @@ -1081,80 +1081,65 @@ func (c *ctxt9) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { p.From.Reg = REG_R3 p.To.Type = obj.TYPE_REG p.To.Reg = REGSP - } else if framesize <= objabi.StackBig { + } else { // large stack: SP-framesize < stackguard-StackSmall - // ADD $-(framesize-StackSmall), SP, R4 - // CMP stackguard, R4 - p = obj.Appendp(p, c.newprog) - - p.As = AADD - p.From.Type = obj.TYPE_CONST - p.From.Offset = -(int64(framesize) - objabi.StackSmall) - p.Reg = REGSP - p.To.Type = obj.TYPE_REG - p.To.Reg = REG_R4 + offset := int64(framesize) - objabi.StackSmall + if framesize > objabi.StackBig { + // Such a large stack we need to protect against underflow. + // The runtime guarantees SP > objabi.StackBig, but + // framesize is large enough that SP-framesize may + // underflow, causing a direct comparison with the + // stack guard to incorrectly succeed. We explicitly + // guard against underflow. + // + // CMPU SP, $(framesize-StackSmall) + // BLT label-of-call-to-morestack + if offset <= 0xffff { + p = obj.Appendp(p, c.newprog) + p.As = ACMPU + p.From.Type = obj.TYPE_REG + p.From.Reg = REGSP + p.To.Type = obj.TYPE_CONST + p.To.Offset = offset + } else { + // Constant is too big for CMPU. + p = obj.Appendp(p, c.newprog) + p.As = AMOVD + p.From.Type = obj.TYPE_CONST + p.From.Offset = offset + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R4 - p = obj.Appendp(p, c.newprog) - p.As = ACMPU - p.From.Type = obj.TYPE_REG - p.From.Reg = REG_R3 - p.To.Type = obj.TYPE_REG - p.To.Reg = REG_R4 - } else { - // Such a large stack we need to protect against wraparound. - // If SP is close to zero: - // SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall) - // The +StackGuard on both sides is required to keep the left side positive: - // SP is allowed to be slightly below stackguard. See stack.h. - // - // Preemption sets stackguard to StackPreempt, a very large value. - // That breaks the math above, so we have to check for that explicitly. - // // stackguard is R3 - // CMP R3, $StackPreempt - // BEQ label-of-call-to-morestack - // ADD $StackGuard, SP, R4 - // SUB R3, R4 - // MOVD $(framesize+(StackGuard-StackSmall)), R31 - // CMPU R31, R4 - p = obj.Appendp(p, c.newprog) + p = obj.Appendp(p, c.newprog) + p.As = ACMPU + p.From.Type = obj.TYPE_REG + p.From.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_R4 + } - p.As = ACMP - p.From.Type = obj.TYPE_REG - p.From.Reg = REG_R3 - p.To.Type = obj.TYPE_CONST - p.To.Offset = objabi.StackPreempt + p = obj.Appendp(p, c.newprog) + q = p + p.As = ABLT + p.To.Type = obj.TYPE_BRANCH + } + // Check against the stack guard. We've ensured this won't underflow. + // ADD $-(framesize-StackSmall), SP, R4 + // CMPU stackguard, R4 p = obj.Appendp(p, c.newprog) - q = p - p.As = ABEQ - p.To.Type = obj.TYPE_BRANCH - p = obj.Appendp(p, c.newprog) p.As = AADD p.From.Type = obj.TYPE_CONST - p.From.Offset = int64(objabi.StackGuard) + p.From.Offset = -offset p.Reg = REGSP p.To.Type = obj.TYPE_REG p.To.Reg = REG_R4 - p = obj.Appendp(p, c.newprog) - p.As = ASUB - p.From.Type = obj.TYPE_REG - p.From.Reg = REG_R3 - p.To.Type = obj.TYPE_REG - p.To.Reg = REG_R4 - - p = obj.Appendp(p, c.newprog) - p.As = AMOVD - p.From.Type = obj.TYPE_CONST - p.From.Offset = int64(framesize) + int64(objabi.StackGuard) - objabi.StackSmall - p.To.Type = obj.TYPE_REG - p.To.Reg = REGTMP - p = obj.Appendp(p, c.newprog) p.As = ACMPU p.From.Type = obj.TYPE_REG - p.From.Reg = REGTMP + p.From.Reg = REG_R3 p.To.Type = obj.TYPE_REG p.To.Reg = REG_R4 } -- 2.48.1