]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/internal/obj/ppc64: simplify huge frame prologue
authorAustin Clements <austin@google.com>
Fri, 2 Apr 2021 21:20:15 +0000 (17:20 -0400)
committerAustin Clements <austin@google.com>
Mon, 5 Apr 2021 16:22:16 +0000 (16:22 +0000)
CL 307010 for ppc64.

I spent a long time trying to figure out how to use the carry bit from
ADDCCC to further simplify this (like what we do on arm64), but gave
up after I couldn't figure out how to access the carry bit without
just adding more instructions.

Change-Id: I6cad51b93616865b203cb16554f16121375aabbc
Reviewed-on: https://go-review.googlesource.com/c/go/+/307149
Trust: Austin Clements <austin@google.com>
Run-TryBot: Austin Clements <austin@google.com>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
src/cmd/internal/obj/ppc64/obj9.go

index a77be29cf09c70373c6a09d749ba18e0551c779c..c2722b0afb05de401ecb5fc1ba2af29ac69c8465 100644 (file)
@@ -1081,80 +1081,65 @@ func (c *ctxt9) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
                p.From.Reg = REG_R3
                p.To.Type = obj.TYPE_REG
                p.To.Reg = REGSP
-       } else if framesize <= objabi.StackBig {
+       } else {
                // large stack: SP-framesize < stackguard-StackSmall
-               //      ADD $-(framesize-StackSmall), SP, R4
-               //      CMP stackguard, R4
-               p = obj.Appendp(p, c.newprog)
-
-               p.As = AADD
-               p.From.Type = obj.TYPE_CONST
-               p.From.Offset = -(int64(framesize) - objabi.StackSmall)
-               p.Reg = REGSP
-               p.To.Type = obj.TYPE_REG
-               p.To.Reg = REG_R4
+               offset := int64(framesize) - objabi.StackSmall
+               if framesize > objabi.StackBig {
+                       // Such a large stack we need to protect against underflow.
+                       // The runtime guarantees SP > objabi.StackBig, but
+                       // framesize is large enough that SP-framesize may
+                       // underflow, causing a direct comparison with the
+                       // stack guard to incorrectly succeed. We explicitly
+                       // guard against underflow.
+                       //
+                       //      CMPU    SP, $(framesize-StackSmall)
+                       //      BLT     label-of-call-to-morestack
+                       if offset <= 0xffff {
+                               p = obj.Appendp(p, c.newprog)
+                               p.As = ACMPU
+                               p.From.Type = obj.TYPE_REG
+                               p.From.Reg = REGSP
+                               p.To.Type = obj.TYPE_CONST
+                               p.To.Offset = offset
+                       } else {
+                               // Constant is too big for CMPU.
+                               p = obj.Appendp(p, c.newprog)
+                               p.As = AMOVD
+                               p.From.Type = obj.TYPE_CONST
+                               p.From.Offset = offset
+                               p.To.Type = obj.TYPE_REG
+                               p.To.Reg = REG_R4
 
-               p = obj.Appendp(p, c.newprog)
-               p.As = ACMPU
-               p.From.Type = obj.TYPE_REG
-               p.From.Reg = REG_R3
-               p.To.Type = obj.TYPE_REG
-               p.To.Reg = REG_R4
-       } else {
-               // Such a large stack we need to protect against wraparound.
-               // If SP is close to zero:
-               //      SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall)
-               // The +StackGuard on both sides is required to keep the left side positive:
-               // SP is allowed to be slightly below stackguard. See stack.h.
-               //
-               // Preemption sets stackguard to StackPreempt, a very large value.
-               // That breaks the math above, so we have to check for that explicitly.
-               //      // stackguard is R3
-               //      CMP     R3, $StackPreempt
-               //      BEQ     label-of-call-to-morestack
-               //      ADD     $StackGuard, SP, R4
-               //      SUB     R3, R4
-               //      MOVD    $(framesize+(StackGuard-StackSmall)), R31
-               //      CMPU    R31, R4
-               p = obj.Appendp(p, c.newprog)
+                               p = obj.Appendp(p, c.newprog)
+                               p.As = ACMPU
+                               p.From.Type = obj.TYPE_REG
+                               p.From.Reg = REGSP
+                               p.To.Type = obj.TYPE_REG
+                               p.To.Reg = REG_R4
+                       }
 
-               p.As = ACMP
-               p.From.Type = obj.TYPE_REG
-               p.From.Reg = REG_R3
-               p.To.Type = obj.TYPE_CONST
-               p.To.Offset = objabi.StackPreempt
+                       p = obj.Appendp(p, c.newprog)
+                       q = p
+                       p.As = ABLT
+                       p.To.Type = obj.TYPE_BRANCH
+               }
 
+               // Check against the stack guard. We've ensured this won't underflow.
+               //      ADD  $-(framesize-StackSmall), SP, R4
+               //      CMPU stackguard, R4
                p = obj.Appendp(p, c.newprog)
-               q = p
-               p.As = ABEQ
-               p.To.Type = obj.TYPE_BRANCH
 
-               p = obj.Appendp(p, c.newprog)
                p.As = AADD
                p.From.Type = obj.TYPE_CONST
-               p.From.Offset = int64(objabi.StackGuard)
+               p.From.Offset = -offset
                p.Reg = REGSP
                p.To.Type = obj.TYPE_REG
                p.To.Reg = REG_R4
 
-               p = obj.Appendp(p, c.newprog)
-               p.As = ASUB
-               p.From.Type = obj.TYPE_REG
-               p.From.Reg = REG_R3
-               p.To.Type = obj.TYPE_REG
-               p.To.Reg = REG_R4
-
-               p = obj.Appendp(p, c.newprog)
-               p.As = AMOVD
-               p.From.Type = obj.TYPE_CONST
-               p.From.Offset = int64(framesize) + int64(objabi.StackGuard) - objabi.StackSmall
-               p.To.Type = obj.TYPE_REG
-               p.To.Reg = REGTMP
-
                p = obj.Appendp(p, c.newprog)
                p.As = ACMPU
                p.From.Type = obj.TYPE_REG
-               p.From.Reg = REGTMP
+               p.From.Reg = REG_R3
                p.To.Type = obj.TYPE_REG
                p.To.Reg = REG_R4
        }