]> Cypherpunks repositories - gostls13.git/commitdiff
runtime, cmd/compile: optimize open-coded defers
authorMatthew Dempsky <mdempsky@google.com>
Fri, 4 Aug 2023 21:10:59 +0000 (14:10 -0700)
committerMatthew Dempsky <mdempsky@google.com>
Mon, 7 Aug 2023 18:05:54 +0000 (18:05 +0000)
This CL optimizes open-coded defers in two ways:

1. It modifies local variable sorting to place all open-coded defer
closure slots in order, so that rather than requiring the metadata to
contain each offset individually, we just need a single offset to the
first slot.

2. Because the slots are in ascending order and can be directly
indexed, we can get rid of the count of how many defers are in the
frame. Instead, we just find the top set bit in the active defers
bitmask, and load the corresponding closure.

Change-Id: I6f912295a492211023a9efe12c94a14f449d86ad
Reviewed-on: https://go-review.googlesource.com/c/go/+/516199
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>

src/cmd/compile/internal/ssagen/pgen.go
src/cmd/compile/internal/ssagen/ssa.go
src/runtime/panic.go
src/runtime/runtime2.go

index fd7b2d9cf5ae3fcb9f989c3a72fd26b4cdea6855..ca064a16a7d920e185f82acfaf1aa904381d431f 100644 (file)
@@ -64,6 +64,22 @@ func cmpstackvarlt(a, b *ir.Name) bool {
                return a.Type().Alignment() > b.Type().Alignment()
        }
 
+       // Sort normal variables before open-coded-defer slots, so that the
+       // latter are grouped together and near the top of the frame (to
+       // minimize varint encoding of their varp offset).
+       if a.OpenDeferSlot() != b.OpenDeferSlot() {
+               return a.OpenDeferSlot()
+       }
+
+       // If a and b are both open-coded defer slots, then order them by
+       // index in descending order, so they'll be laid out in the frame in
+       // ascending order.
+       //
+       // Their index was saved in FrameOffset in state.openDeferSave.
+       if a.OpenDeferSlot() {
+               return a.FrameOffset() > b.FrameOffset()
+       }
+
        // Tie breaker for stable results.
        return a.Sym().Name < b.Sym().Name
 }
index bb67bd6bad72a4f78a45e175cbdf34840c92c12b..03b9e568697ce043b95bda2ee197bd983d16be5f 100644 (file)
@@ -282,22 +282,26 @@ func dvarint(x *obj.LSym, off int, v int64) int {
 // top of the local variables) for their starting address. The format is:
 //
 //   - Offset of the deferBits variable
-//   - Number of defers in the function
-//   - Information about each defer call, in reverse order of appearance in the function:
-//   - Offset of the closure value to call
+//   - Offset of the first closure slot (the rest are laid out consecutively).
 func (s *state) emitOpenDeferInfo() {
+       firstOffset := s.openDefers[0].closureNode.FrameOffset()
+
+       // Verify that cmpstackvarlt laid out the slots in order.
+       for i, r := range s.openDefers {
+               have := r.closureNode.FrameOffset()
+               want := firstOffset + int64(i)*int64(types.PtrSize)
+               if have != want {
+                       base.FatalfAt(s.curfn.Pos(), "unexpected frame offset for open-coded defer slot #%v: have %v, want %v", i, have, want)
+               }
+       }
+
        x := base.Ctxt.Lookup(s.curfn.LSym.Name + ".opendefer")
        x.Set(obj.AttrContentAddressable, true)
        s.curfn.LSym.Func().OpenCodedDeferInfo = x
+
        off := 0
        off = dvarint(x, off, -s.deferBitsTemp.FrameOffset())
-       off = dvarint(x, off, int64(len(s.openDefers)))
-
-       // Write in reverse-order, for ease of running in that order at runtime
-       for i := len(s.openDefers) - 1; i >= 0; i-- {
-               r := s.openDefers[i]
-               off = dvarint(x, off, -r.closureNode.FrameOffset())
-       }
+       off = dvarint(x, off, -firstOffset)
 }
 
 func okOffset(offset int64) int64 {
@@ -567,7 +571,7 @@ func buildssa(fn *ir.Func, worker int) *ssa.Func {
        // Main call to ssa package to compile function
        ssa.Compile(s.f)
 
-       if s.hasOpenDefers {
+       if len(s.openDefers) != 0 {
                s.emitOpenDeferInfo()
        }
 
@@ -5053,6 +5057,7 @@ func (s *state) openDeferSave(t *types.Type, val *ssa.Value) *ssa.Value {
        pos := val.Pos
        temp := typecheck.TempAt(pos.WithNotStmt(), s.curfn, t)
        temp.SetOpenDeferSlot(true)
+       temp.SetFrameOffset(int64(len(s.openDefers))) // so cmpstackvarlt can order them
        var addrTemp *ssa.Value
        // Use OpVarLive to make sure stack slot for the closure is not removed by
        // dead-store elimination
index 1ed40c71a41aa4b4bda868b40088748cf64e087c..e7483b80b6ad9a49e40f08c6ddafd50bd3dc215e 100644 (file)
@@ -682,24 +682,21 @@ func (p *_panic) nextDefer() (func(), bool) {
        p.argp = add(p.startSP, sys.MinFrameSize)
 
        for {
-               for p.openDefers > 0 {
-                       p.openDefers--
-
-                       // Find the closure offset for the next deferred call.
-                       var closureOffset uint32
-                       closureOffset, p.closureOffsets = readvarintUnsafe(p.closureOffsets)
-
-                       bit := uint8(1 << p.openDefers)
-                       if *p.deferBitsPtr&bit == 0 {
-                               continue
+               for p.deferBitsPtr != nil {
+                       bits := *p.deferBitsPtr
+                       if bits == 0 {
+                               p.deferBitsPtr = nil
+                               break
                        }
-                       *p.deferBitsPtr &^= bit
 
-                       if *p.deferBitsPtr == 0 {
-                               p.openDefers = 0 // short circuit: no more active defers
-                       }
+                       // Find index of top bit set.
+                       i := 7 - uintptr(sys.LeadingZeros8(bits))
+
+                       // Clear bit and store it back.
+                       bits &^= 1 << i
+                       *p.deferBitsPtr = bits
 
-                       return *(*func())(add(p.varp, -uintptr(closureOffset))), true
+                       return *(*func())(add(p.slotsPtr, i*goarch.PtrSize)), true
                }
 
                if d := gp._defer; d != nil && d.sp == uintptr(p.sp) {
@@ -752,25 +749,8 @@ func (p *_panic) nextFrame() (ok bool) {
                        // then we can simply loop until we find the next frame where
                        // it's non-zero.
 
-                       if fd := funcdata(u.frame.fn, abi.FUNCDATA_OpenCodedDeferInfo); fd != nil {
-                               if u.frame.fn.deferreturn == 0 {
-                                       throw("missing deferreturn")
-                               }
-                               p.retpc = u.frame.fn.entry() + uintptr(u.frame.fn.deferreturn)
-
-                               var deferBitsOffset uint32
-                               deferBitsOffset, fd = readvarintUnsafe(fd)
-                               deferBitsPtr := (*uint8)(add(unsafe.Pointer(u.frame.varp), -uintptr(deferBitsOffset)))
-
-                               if *deferBitsPtr != 0 {
-                                       var openDefers uint32
-                                       openDefers, fd = readvarintUnsafe(fd)
-
-                                       p.openDefers = uint8(openDefers)
-                                       p.deferBitsPtr = deferBitsPtr
-                                       p.closureOffsets = fd
-                                       break // found a frame with open-coded defers
-                               }
+                       if p.initOpenCodedDefers(u.frame.fn, unsafe.Pointer(u.frame.varp)) {
+                               break // found a frame with open-coded defers
                        }
 
                        if u.frame.sp == limit {
@@ -787,7 +767,6 @@ func (p *_panic) nextFrame() (ok bool) {
                }
                p.sp = unsafe.Pointer(u.frame.sp)
                p.fp = unsafe.Pointer(u.frame.fp)
-               p.varp = unsafe.Pointer(u.frame.varp)
 
                ok = true
        })
@@ -795,6 +774,31 @@ func (p *_panic) nextFrame() (ok bool) {
        return
 }
 
+func (p *_panic) initOpenCodedDefers(fn funcInfo, varp unsafe.Pointer) bool {
+       fd := funcdata(fn, abi.FUNCDATA_OpenCodedDeferInfo)
+       if fd == nil {
+               return false
+       }
+
+       if fn.deferreturn == 0 {
+               throw("missing deferreturn")
+       }
+
+       deferBitsOffset, fd := readvarintUnsafe(fd)
+       deferBitsPtr := (*uint8)(add(varp, -uintptr(deferBitsOffset)))
+       if *deferBitsPtr == 0 {
+               return false // has open-coded defers, but none pending
+       }
+
+       slotsOffset, fd := readvarintUnsafe(fd)
+
+       p.retpc = fn.entry() + uintptr(fn.deferreturn)
+       p.deferBitsPtr = deferBitsPtr
+       p.slotsPtr = add(varp, -uintptr(slotsOffset))
+
+       return true
+}
+
 // The implementation of the predeclared function recover.
 // Cannot split the stack because it needs to reliably
 // find the stack segment of its caller.
index b9547651eefd5408ac05ca82e809d58ed37a1f49..cdd8c3db7fe1ac85d427dc29f74c15707195c368 100644 (file)
@@ -1024,19 +1024,17 @@ type _panic struct {
        startSP unsafe.Pointer
 
        // The current stack frame that we're running deferred calls for.
-       sp   unsafe.Pointer
-       lr   uintptr
-       fp   unsafe.Pointer
-       varp unsafe.Pointer
+       sp unsafe.Pointer
+       lr uintptr
+       fp unsafe.Pointer
 
        // retpc stores the PC where the panic should jump back to, if the
        // function last returned by _panic.next() recovers the panic.
        retpc uintptr
 
        // Extra state for handling open-coded defers.
-       deferBitsPtr   *uint8
-       closureOffsets unsafe.Pointer
-       openDefers     uint8 // count of pending open-coded defers
+       deferBitsPtr *uint8
+       slotsPtr     unsafe.Pointer
 
        recovered   bool // whether this panic has been recovered
        goexit      bool