]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/internal/obj/x86: use push/pop instead of mov to store/load FP
authorqmuntal <quimmuntal@gmail.com>
Thu, 19 Jan 2023 15:17:56 +0000 (16:17 +0100)
committerQuim Muntal <quimmuntal@gmail.com>
Tue, 31 Jan 2023 07:55:26 +0000 (07:55 +0000)
This CL changes how the x86 compiler stores and loads the frame pointer
on each function prologue and epilogue, with the goal to reduce the
final binary size without affecting performance.

The compiler is currently using MOV instructions to load and store BP,
which can take from 5 to 8 bytes each.

This CL changes this approach so it emits PUSH/POP instructions instead,
which always take only 1 byte each (when operating with BP). It can also
avoid using the SUBQ/ADDQ to grow the stack for functions that have
frame pointer but does not have local variables.

On Windows, this CL reduces the go toolchain size from 15,697,920 bytes
to 15,584,768 bytes, a reduction of 0.7%.

Example of epilog and prologue for a function with 0x10 bytes of
local variables:

Before

===
 SUBQ    $0x18, SP
 MOVQ    BP, 0x10(SP)
 LEAQ    0x10(SP), BP

 ... function body ...

 MOVQ    0x10(SP), BP
 ADDQ    $0x18, SP
 RET
===

After

===
  PUSHQ   BP
  LEAQ    0(SP), BP
  SUBQ    $0x10, SP

  ... function body ...

  MOVQ    ADDQ $0x10, SP
  POPQ    BP
  RET
===

Updates #6853

Change-Id: Ice9e14bbf8dff083c5f69feb97e9a764c3ca7785
Reviewed-on: https://go-review.googlesource.com/c/go/+/462300
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Quim Muntal <quimmuntal@gmail.com>

src/cmd/internal/obj/x86/obj6.go

index aa4cc225c6ad484bdc9c3453b762b2385836e9f8..33c02d59e822ceae30f7e8db5b34aabe5f21c7ce 100644 (file)
@@ -684,37 +684,13 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                p, regg = loadG(ctxt, cursym, p, newprog)
        }
 
-       // Delve debugger would like the next instruction to be noted as the end of the function prologue.
-       // TODO: are there other cases (e.g., wrapper functions) that need marking?
-       markedPrologue := false
-
-       if autoffset != 0 {
-               if autoffset%int32(ctxt.Arch.RegSize) != 0 {
-                       ctxt.Diag("unaligned stack size %d", autoffset)
-               }
-               p = obj.Appendp(p, newprog)
-               p.As = AADJSP
-               p.From.Type = obj.TYPE_CONST
-               p.From.Offset = int64(autoffset)
-               p.Spadj = autoffset
-               p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
-               markedPrologue = true
-       }
-
        if bpsize > 0 {
                // Save caller's BP
                p = obj.Appendp(p, newprog)
 
-               p.As = AMOVQ
+               p.As = APUSHQ
                p.From.Type = obj.TYPE_REG
                p.From.Reg = REG_BP
-               p.To.Type = obj.TYPE_MEM
-               p.To.Reg = REG_SP
-               p.To.Scale = 1
-               p.To.Offset = int64(autoffset) - int64(bpsize)
-               if !markedPrologue {
-                       p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
-               }
 
                // Move current frame to BP
                p = obj.Appendp(p, newprog)
@@ -723,11 +699,32 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                p.From.Type = obj.TYPE_MEM
                p.From.Reg = REG_SP
                p.From.Scale = 1
-               p.From.Offset = int64(autoffset) - int64(bpsize)
+               p.From.Offset = 0
                p.To.Type = obj.TYPE_REG
                p.To.Reg = REG_BP
        }
 
+       if autoffset%int32(ctxt.Arch.RegSize) != 0 {
+               ctxt.Diag("unaligned stack size %d", autoffset)
+       }
+
+       // localoffset is autoffset discounting the frame pointer,
+       // which has already been allocated in the stack.
+       localoffset := autoffset - int32(bpsize)
+       if localoffset != 0 {
+               p = obj.Appendp(p, newprog)
+               p.As = AADJSP
+               p.From.Type = obj.TYPE_CONST
+               p.From.Offset = int64(localoffset)
+               p.Spadj = localoffset
+       }
+
+       // Delve debugger would like the next instruction to be noted as the end of the function prologue.
+       // TODO: are there other cases (e.g., wrapper functions) that need marking?
+       if autoffset != 0 {
+               p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
+       }
+
        if cursym.Func().Text.From.Sym.Wrapper() {
                // if g._panic != nil && g._panic.argp == FP {
                //   g._panic.argp = bottom-of-frame
@@ -933,24 +930,23 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                if autoffset != 0 {
                        to := p.To // Keep To attached to RET for retjmp below
                        p.To = obj.Addr{}
+                       if localoffset != 0 {
+                               p.As = AADJSP
+                               p.From.Type = obj.TYPE_CONST
+                               p.From.Offset = int64(-localoffset)
+                               p.Spadj = -localoffset
+                               p = obj.Appendp(p, newprog)
+                       }
+
                        if bpsize > 0 {
                                // Restore caller's BP
-                               p.As = AMOVQ
-
-                               p.From.Type = obj.TYPE_MEM
-                               p.From.Reg = REG_SP
-                               p.From.Scale = 1
-                               p.From.Offset = int64(autoffset) - int64(bpsize)
+                               p.As = APOPQ
                                p.To.Type = obj.TYPE_REG
                                p.To.Reg = REG_BP
+                               p.Spadj = -int32(bpsize)
                                p = obj.Appendp(p, newprog)
                        }
 
-                       p.As = AADJSP
-                       p.From.Type = obj.TYPE_CONST
-                       p.From.Offset = int64(-autoffset)
-                       p.Spadj = -autoffset
-                       p = obj.Appendp(p, newprog)
                        p.As = obj.ARET
                        p.To = to