From: Russ Cox Date: Mon, 11 May 2015 00:22:32 +0000 (-0400) Subject: runtime: rewrite addb/subtractb to be simpler to compile; introduce add1, subtract1 X-Git-Tag: go1.5beta1~597 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=94934f843ee8b0a4a09dc336d4e2b57601b34206;p=gostls13.git runtime: rewrite addb/subtractb to be simpler to compile; introduce add1, subtract1 This reduces the depth of the inlining at a particular call site. The inliner introduces many temporary variables, and the compiler can do a better job with fewer. Being verbose in the bodies of these helper functions seems like a reasonable tradeoff: the uses are still just as readable, and they run faster in some important cases. Change-Id: I5323976ed3704d0acd18fb31176cfbf5ba23a89c Reviewed-on: https://go-review.googlesource.com/9883 Reviewed-by: Rick Hudson Reviewed-by: Austin Clements --- diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 56e773ad5e..db43e482d2 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -83,13 +83,37 @@ const ( // addb returns the byte pointer p+n. //go:nowritebarrier func addb(p *byte, n uintptr) *byte { - return (*byte)(add(unsafe.Pointer(p), n)) + // Note: wrote out full expression instead of calling add(p, n) + // to reduce the number of temporaries generated by the + // compiler for this trivial expression during inlining. + return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + n)) } // subtractb returns the byte pointer p-n. //go:nowritebarrier func subtractb(p *byte, n uintptr) *byte { - return (*byte)(add(unsafe.Pointer(p), -n)) + // Note: wrote out full expression instead of calling add(p, -n) + // to reduce the number of temporaries generated by the + // compiler for this trivial expression during inlining. + return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - n)) +} + +// add1 returns the byte pointer p+1. +//go:nowritebarrier +func add1(p *byte) *byte { + // Note: wrote out full expression instead of calling addb(p, 1) + // to reduce the number of temporaries generated by the + // compiler for this trivial expression during inlining. + return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + 1)) +} + +// subtract1 returns the byte pointer p-1. +//go:nowritebarrier +func subtract1(p *byte) *byte { + // Note: wrote out full expression instead of calling subtractb(p, 1) + // to reduce the number of temporaries generated by the + // compiler for this trivial expression during inlining. + return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1)) } // mHeap_MapBits is called each time arena_used is extended. @@ -220,7 +244,7 @@ func (h heapBits) next() heapBits { if h.shift < 3*heapBitsShift { return heapBits{h.bitp, h.shift + heapBitsShift} } - return heapBits{subtractb(h.bitp, 1), 0} + return heapBits{subtract1(h.bitp), 0} } // forward returns the heapBits describing n pointer-sized words ahead of h in memory. @@ -291,7 +315,7 @@ func (h heapBits) hasPointers(size uintptr) bool { if h.shift == 0 { return b&(bitMarked<<(2*heapBitsShift)) != 0 } - return uint32(*subtractb(h.bitp, 1))&bitMarked != 0 + return uint32(*subtract1(h.bitp))&bitMarked != 0 } // isCheckmarked reports whether the heap bits have the checkmarked bit set. @@ -378,7 +402,7 @@ func (h heapBits) initCheckmarkSpan(size, n, total uintptr) { bitp := h.bitp for i := uintptr(0); i < n; i += 4 { *bitp &^= bitPointerAll - bitp = subtractb(bitp, 1) + bitp = subtract1(bitp) } return } @@ -402,7 +426,7 @@ func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) { bitp := h.bitp for i := uintptr(0); i < n; i += 4 { *bitp |= bitPointerAll - bitp = subtractb(bitp, 1) + bitp = subtract1(bitp) } } } @@ -449,7 +473,7 @@ func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) { f(base + (i+3)*ptrSize) } *bitp = uint8(x) - bitp = subtractb(bitp, 1) + bitp = subtract1(bitp) } case size%(4*ptrSize) == 0: @@ -499,7 +523,7 @@ func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) { x &^= (bitMarked|bitPointer)<<(2*heapBitsShift) | (bitMarked|bitPointer)<<(3*heapBitsShift) f(base + (i+1)*size) if size > 2*ptrSize { - *subtractb(bitp, 1) = 0 + *subtract1(bitp) = 0 } } *bitp = uint8(x) @@ -590,7 +614,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { unrollgcprog_m(typ) }) } - ptrmask = addb(ptrmask, 1) // skip the unroll flag byte + ptrmask = add1(ptrmask) // skip the unroll flag byte } // Heap bitmap bits for 2-word object are only 4 bits, @@ -687,7 +711,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { nb = typ.ptrdata / ptrSize for i := uintptr(0); i < nb; i += 8 { b |= uintptr(*p) << i - p = addb(p, 1) + p = add1(p) } nb = typ.size / ptrSize @@ -724,7 +748,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { } if p != nil { b = uintptr(*p) - p = addb(p, 1) + p = add1(p) nb = 8 } @@ -776,7 +800,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { goto Phase3 } *hbitp = uint8(hb) - hbitp = subtractb(hbitp, 1) + hbitp = subtract1(hbitp) b >>= 4 nb -= 4 @@ -800,7 +824,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { } else { atomicor8(hbitp, uint8(hb)) } - hbitp = subtractb(hbitp, 1) + hbitp = subtract1(hbitp) if w += 2; w >= nw { // We know that there is more data, because we handled 2-word objects above. // This must be at least a 6-word object. If we're out of pointer words, @@ -830,7 +854,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { break } *hbitp = uint8(hb) - hbitp = subtractb(hbitp, 1) + hbitp = subtract1(hbitp) b >>= 4 // Load more bits. b has nb right now. @@ -840,7 +864,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // and the next iteration will consume 8 bits, // leaving us with the same nb the next time we're here. b |= uintptr(*p) << nb - p = addb(p, 1) + p = add1(p) } else if p == nil { // Almost as fast path: track bit count and refill from pbits. // For short repetitions. @@ -856,7 +880,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { nb += endnb if nb < 8 { b |= uintptr(*ptrmask) << nb - p = addb(ptrmask, 1) + p = add1(ptrmask) } else { nb -= 8 p = ptrmask @@ -870,7 +894,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { break } *hbitp = uint8(hb) - hbitp = subtractb(hbitp, 1) + hbitp = subtract1(hbitp) b >>= 4 } @@ -891,11 +915,11 @@ Phase3: // The first is hb, the rest are zero. if w <= nw { *hbitp = uint8(hb) - hbitp = subtractb(hbitp, 1) + hbitp = subtract1(hbitp) hb = 0 // for possible final half-byte below for w += 4; w <= nw; w += 4 { *hbitp = 0 - hbitp = subtractb(hbitp, 1) + hbitp = subtract1(hbitp) } } @@ -1021,9 +1045,9 @@ func unrollgcprog1(maskp *byte, prog *byte, ppos *uintptr, inplace bool) *byte { throw("unrollgcprog: unknown instruction") case insData: - prog = addb(prog, 1) + prog = add1(prog) siz := int(*prog) - prog = addb(prog, 1) + prog = add1(prog) p := (*[1 << 30]byte)(unsafe.Pointer(prog)) for i := 0; i < siz; i++ { v := p[i/8] >> (uint(i) % 8) & 1