From bb5974e0cb507487ab7a164c08452456840d3ad7 Mon Sep 17 00:00:00 2001 From: Matthew Dempsky Date: Fri, 4 Aug 2023 14:10:59 -0700 Subject: [PATCH] runtime, cmd/compile: optimize open-coded defers This CL optimizes open-coded defers in two ways: 1. It modifies local variable sorting to place all open-coded defer closure slots in order, so that rather than requiring the metadata to contain each offset individually, we just need a single offset to the first slot. 2. Because the slots are in ascending order and can be directly indexed, we can get rid of the count of how many defers are in the frame. Instead, we just find the top set bit in the active defers bitmask, and load the corresponding closure. Change-Id: I6f912295a492211023a9efe12c94a14f449d86ad Reviewed-on: https://go-review.googlesource.com/c/go/+/516199 Reviewed-by: Keith Randall Reviewed-by: Keith Randall Run-TryBot: Matthew Dempsky TryBot-Result: Gopher Robot --- src/cmd/compile/internal/ssagen/pgen.go | 16 ++++++ src/cmd/compile/internal/ssagen/ssa.go | 27 +++++---- src/runtime/panic.go | 74 +++++++++++++------------ src/runtime/runtime2.go | 12 ++-- 4 files changed, 76 insertions(+), 53 deletions(-) diff --git a/src/cmd/compile/internal/ssagen/pgen.go b/src/cmd/compile/internal/ssagen/pgen.go index fd7b2d9cf5..ca064a16a7 100644 --- a/src/cmd/compile/internal/ssagen/pgen.go +++ b/src/cmd/compile/internal/ssagen/pgen.go @@ -64,6 +64,22 @@ func cmpstackvarlt(a, b *ir.Name) bool { return a.Type().Alignment() > b.Type().Alignment() } + // Sort normal variables before open-coded-defer slots, so that the + // latter are grouped together and near the top of the frame (to + // minimize varint encoding of their varp offset). + if a.OpenDeferSlot() != b.OpenDeferSlot() { + return a.OpenDeferSlot() + } + + // If a and b are both open-coded defer slots, then order them by + // index in descending order, so they'll be laid out in the frame in + // ascending order. + // + // Their index was saved in FrameOffset in state.openDeferSave. + if a.OpenDeferSlot() { + return a.FrameOffset() > b.FrameOffset() + } + // Tie breaker for stable results. return a.Sym().Name < b.Sym().Name } diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go index bb67bd6bad..03b9e56869 100644 --- a/src/cmd/compile/internal/ssagen/ssa.go +++ b/src/cmd/compile/internal/ssagen/ssa.go @@ -282,22 +282,26 @@ func dvarint(x *obj.LSym, off int, v int64) int { // top of the local variables) for their starting address. The format is: // // - Offset of the deferBits variable -// - Number of defers in the function -// - Information about each defer call, in reverse order of appearance in the function: -// - Offset of the closure value to call +// - Offset of the first closure slot (the rest are laid out consecutively). func (s *state) emitOpenDeferInfo() { + firstOffset := s.openDefers[0].closureNode.FrameOffset() + + // Verify that cmpstackvarlt laid out the slots in order. + for i, r := range s.openDefers { + have := r.closureNode.FrameOffset() + want := firstOffset + int64(i)*int64(types.PtrSize) + if have != want { + base.FatalfAt(s.curfn.Pos(), "unexpected frame offset for open-coded defer slot #%v: have %v, want %v", i, have, want) + } + } + x := base.Ctxt.Lookup(s.curfn.LSym.Name + ".opendefer") x.Set(obj.AttrContentAddressable, true) s.curfn.LSym.Func().OpenCodedDeferInfo = x + off := 0 off = dvarint(x, off, -s.deferBitsTemp.FrameOffset()) - off = dvarint(x, off, int64(len(s.openDefers))) - - // Write in reverse-order, for ease of running in that order at runtime - for i := len(s.openDefers) - 1; i >= 0; i-- { - r := s.openDefers[i] - off = dvarint(x, off, -r.closureNode.FrameOffset()) - } + off = dvarint(x, off, -firstOffset) } func okOffset(offset int64) int64 { @@ -567,7 +571,7 @@ func buildssa(fn *ir.Func, worker int) *ssa.Func { // Main call to ssa package to compile function ssa.Compile(s.f) - if s.hasOpenDefers { + if len(s.openDefers) != 0 { s.emitOpenDeferInfo() } @@ -5053,6 +5057,7 @@ func (s *state) openDeferSave(t *types.Type, val *ssa.Value) *ssa.Value { pos := val.Pos temp := typecheck.TempAt(pos.WithNotStmt(), s.curfn, t) temp.SetOpenDeferSlot(true) + temp.SetFrameOffset(int64(len(s.openDefers))) // so cmpstackvarlt can order them var addrTemp *ssa.Value // Use OpVarLive to make sure stack slot for the closure is not removed by // dead-store elimination diff --git a/src/runtime/panic.go b/src/runtime/panic.go index 1ed40c71a4..e7483b80b6 100644 --- a/src/runtime/panic.go +++ b/src/runtime/panic.go @@ -682,24 +682,21 @@ func (p *_panic) nextDefer() (func(), bool) { p.argp = add(p.startSP, sys.MinFrameSize) for { - for p.openDefers > 0 { - p.openDefers-- - - // Find the closure offset for the next deferred call. - var closureOffset uint32 - closureOffset, p.closureOffsets = readvarintUnsafe(p.closureOffsets) - - bit := uint8(1 << p.openDefers) - if *p.deferBitsPtr&bit == 0 { - continue + for p.deferBitsPtr != nil { + bits := *p.deferBitsPtr + if bits == 0 { + p.deferBitsPtr = nil + break } - *p.deferBitsPtr &^= bit - if *p.deferBitsPtr == 0 { - p.openDefers = 0 // short circuit: no more active defers - } + // Find index of top bit set. + i := 7 - uintptr(sys.LeadingZeros8(bits)) + + // Clear bit and store it back. + bits &^= 1 << i + *p.deferBitsPtr = bits - return *(*func())(add(p.varp, -uintptr(closureOffset))), true + return *(*func())(add(p.slotsPtr, i*goarch.PtrSize)), true } if d := gp._defer; d != nil && d.sp == uintptr(p.sp) { @@ -752,25 +749,8 @@ func (p *_panic) nextFrame() (ok bool) { // then we can simply loop until we find the next frame where // it's non-zero. - if fd := funcdata(u.frame.fn, abi.FUNCDATA_OpenCodedDeferInfo); fd != nil { - if u.frame.fn.deferreturn == 0 { - throw("missing deferreturn") - } - p.retpc = u.frame.fn.entry() + uintptr(u.frame.fn.deferreturn) - - var deferBitsOffset uint32 - deferBitsOffset, fd = readvarintUnsafe(fd) - deferBitsPtr := (*uint8)(add(unsafe.Pointer(u.frame.varp), -uintptr(deferBitsOffset))) - - if *deferBitsPtr != 0 { - var openDefers uint32 - openDefers, fd = readvarintUnsafe(fd) - - p.openDefers = uint8(openDefers) - p.deferBitsPtr = deferBitsPtr - p.closureOffsets = fd - break // found a frame with open-coded defers - } + if p.initOpenCodedDefers(u.frame.fn, unsafe.Pointer(u.frame.varp)) { + break // found a frame with open-coded defers } if u.frame.sp == limit { @@ -787,7 +767,6 @@ func (p *_panic) nextFrame() (ok bool) { } p.sp = unsafe.Pointer(u.frame.sp) p.fp = unsafe.Pointer(u.frame.fp) - p.varp = unsafe.Pointer(u.frame.varp) ok = true }) @@ -795,6 +774,31 @@ func (p *_panic) nextFrame() (ok bool) { return } +func (p *_panic) initOpenCodedDefers(fn funcInfo, varp unsafe.Pointer) bool { + fd := funcdata(fn, abi.FUNCDATA_OpenCodedDeferInfo) + if fd == nil { + return false + } + + if fn.deferreturn == 0 { + throw("missing deferreturn") + } + + deferBitsOffset, fd := readvarintUnsafe(fd) + deferBitsPtr := (*uint8)(add(varp, -uintptr(deferBitsOffset))) + if *deferBitsPtr == 0 { + return false // has open-coded defers, but none pending + } + + slotsOffset, fd := readvarintUnsafe(fd) + + p.retpc = fn.entry() + uintptr(fn.deferreturn) + p.deferBitsPtr = deferBitsPtr + p.slotsPtr = add(varp, -uintptr(slotsOffset)) + + return true +} + // The implementation of the predeclared function recover. // Cannot split the stack because it needs to reliably // find the stack segment of its caller. diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index b9547651ee..cdd8c3db7f 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -1024,19 +1024,17 @@ type _panic struct { startSP unsafe.Pointer // The current stack frame that we're running deferred calls for. - sp unsafe.Pointer - lr uintptr - fp unsafe.Pointer - varp unsafe.Pointer + sp unsafe.Pointer + lr uintptr + fp unsafe.Pointer // retpc stores the PC where the panic should jump back to, if the // function last returned by _panic.next() recovers the panic. retpc uintptr // Extra state for handling open-coded defers. - deferBitsPtr *uint8 - closureOffsets unsafe.Pointer - openDefers uint8 // count of pending open-coded defers + deferBitsPtr *uint8 + slotsPtr unsafe.Pointer recovered bool // whether this panic has been recovered goexit bool -- 2.50.0