From a20fd1f6ba668ec0bd8c432d26def2b65cc6609a Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 29 Apr 2016 14:51:48 -0400 Subject: [PATCH] runtime: reclaim scan/dead bit in first word MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit With the switch to separate mark bitmaps, the scan/dead bit for the first word of each object is now unused. Reclaim this bit and use it as a scan/dead bit, just like words three and on. The second word is still used for checkmark. This dramatically simplifies heapBitsSetTypeNoScan and hasPointers, since they no longer need different cases for 1, 2, and 3+ word objects. They can instead just manipulate the heap bitmap for the first word and be done with it. In order to enable this, we change heapBitsSetType and runGCProg to always set the scan/dead bit to scan for the first word on every code path. Since these functions only apply to types that have pointers, there's no need to do this conditionally: it's *always* necessary to set the scan bit in the first word. We also change every place that scans an object and checks if there are more pointers. Rather than only checking morePointers if the word is >= 2, we now check morePointers if word != 1 (since that's the checkmark word). Looking forward, we should probably reclaim the checkmark bit, too, but that's going to be quite a bit more work. Tested by setting doubleCheck in heapBitsSetType and running all.bash on both linux/amd64 and linux/386, and by running GOGC=10 all.bash. This particularly improves the FmtFprintf* go1 benchmarks, since they do a large amount of noscan allocation. name old time/op new time/op delta BinaryTree17-12 2.34s ± 1% 2.38s ± 1% +1.70% (p=0.000 n=17+19) Fannkuch11-12 2.09s ± 0% 2.09s ± 1% ~ (p=0.276 n=17+16) FmtFprintfEmpty-12 44.9ns ± 2% 44.8ns ± 2% ~ (p=0.340 n=19+18) FmtFprintfString-12 127ns ± 0% 125ns ± 0% -1.57% (p=0.000 n=16+15) FmtFprintfInt-12 128ns ± 0% 122ns ± 1% -4.45% (p=0.000 n=15+20) FmtFprintfIntInt-12 207ns ± 1% 193ns ± 0% -6.55% (p=0.000 n=19+14) FmtFprintfPrefixedInt-12 197ns ± 1% 191ns ± 0% -2.93% (p=0.000 n=17+18) FmtFprintfFloat-12 263ns ± 0% 248ns ± 1% -5.88% (p=0.000 n=15+19) FmtManyArgs-12 794ns ± 0% 779ns ± 1% -1.90% (p=0.000 n=18+18) GobDecode-12 7.14ms ± 2% 7.11ms ± 1% ~ (p=0.072 n=20+20) GobEncode-12 5.85ms ± 1% 5.82ms ± 1% -0.49% (p=0.000 n=20+20) Gzip-12 218ms ± 1% 215ms ± 1% -1.22% (p=0.000 n=19+19) Gunzip-12 36.8ms ± 0% 36.7ms ± 0% -0.18% (p=0.006 n=18+20) HTTPClientServer-12 77.1µs ± 4% 77.1µs ± 3% ~ (p=0.945 n=19+20) JSONEncode-12 15.6ms ± 1% 15.9ms ± 1% +1.68% (p=0.000 n=18+20) JSONDecode-12 55.2ms ± 1% 53.6ms ± 1% -2.93% (p=0.000 n=17+19) Mandelbrot200-12 4.05ms ± 1% 4.05ms ± 0% ~ (p=0.306 n=17+17) GoParse-12 3.14ms ± 1% 3.10ms ± 1% -1.31% (p=0.000 n=19+18) RegexpMatchEasy0_32-12 69.3ns ± 1% 70.0ns ± 0% +0.89% (p=0.000 n=19+17) RegexpMatchEasy0_1K-12 237ns ± 1% 236ns ± 0% -0.62% (p=0.000 n=19+16) RegexpMatchEasy1_32-12 69.5ns ± 1% 70.3ns ± 1% +1.14% (p=0.000 n=18+17) RegexpMatchEasy1_1K-12 377ns ± 1% 366ns ± 1% -3.03% (p=0.000 n=15+19) RegexpMatchMedium_32-12 107ns ± 1% 107ns ± 2% ~ (p=0.318 n=20+19) RegexpMatchMedium_1K-12 33.8µs ± 3% 33.5µs ± 1% -1.04% (p=0.001 n=20+19) RegexpMatchHard_32-12 1.68µs ± 1% 1.73µs ± 0% +2.50% (p=0.000 n=20+18) RegexpMatchHard_1K-12 50.8µs ± 1% 52.0µs ± 1% +2.50% (p=0.000 n=19+18) Revcomp-12 381ms ± 1% 385ms ± 1% +1.00% (p=0.000 n=17+18) Template-12 64.9ms ± 3% 62.6ms ± 1% -3.55% (p=0.000 n=19+18) TimeParse-12 324ns ± 0% 328ns ± 1% +1.25% (p=0.000 n=18+18) TimeFormat-12 345ns ± 0% 334ns ± 0% -3.31% (p=0.000 n=15+17) [Geo mean] 52.1µs 51.5µs -1.00% Change-Id: I13e74da3193a7f80794c654f944d1f0d60817049 Reviewed-on: https://go-review.googlesource.com/22632 Reviewed-by: Rick Hudson Run-TryBot: Austin Clements TryBot-Result: Gobot Gobot --- src/reflect/all_test.go | 3 + src/runtime/cgocall.go | 2 +- src/runtime/gcinfo_test.go | 5 +- src/runtime/heapdump.go | 2 +- src/runtime/malloc.go | 2 +- src/runtime/mbitmap.go | 128 ++++++++++++++----------------------- src/runtime/mgcmark.go | 2 +- 7 files changed, 59 insertions(+), 85 deletions(-) diff --git a/src/reflect/all_test.go b/src/reflect/all_test.go index f8ffaae8e1..d4c3e4e588 100644 --- a/src/reflect/all_test.go +++ b/src/reflect/all_test.go @@ -5399,6 +5399,9 @@ func verifyGCBitsSlice(t *testing.T, typ Type, cap int, bits []byte) { for len(bits) > 2 && bits[len(bits)-1] == 0 { bits = bits[:len(bits)-1] } + if len(bits) == 2 && bits[0] == 0 && bits[1] == 0 { + bits = bits[:0] + } if !bytes.Equal(heapBits, bits) { t.Errorf("heapBits incorrect for make(%v, 0, %v)\nhave %v\nwant %v", typ, cap, heapBits, bits) } diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go index 887343edd1..6dceff09ef 100644 --- a/src/runtime/cgocall.go +++ b/src/runtime/cgocall.go @@ -559,7 +559,7 @@ func cgoCheckUnknownPointer(p unsafe.Pointer, msg string) (base, i uintptr) { } n := span.elemsize for i = uintptr(0); i < n; i += sys.PtrSize { - if i >= 2*sys.PtrSize && !hbits.morePointers() { + if i != 1*sys.PtrSize && !hbits.morePointers() { // No more possible pointers. break } diff --git a/src/runtime/gcinfo_test.go b/src/runtime/gcinfo_test.go index 9a61b4f2b2..011f005403 100644 --- a/src/runtime/gcinfo_test.go +++ b/src/runtime/gcinfo_test.go @@ -66,7 +66,7 @@ func verifyGCInfo(t *testing.T, name string, p interface{}, mask0 []byte) { } func padDead(mask []byte) []byte { - // Because the dead bit isn't encoded until the third word, + // Because the dead bit isn't encoded in the second word, // and because on 32-bit systems a one-word allocation // uses a two-word block, the pointer info for a one-word // object needs to be expanded to include an extra scalar @@ -81,6 +81,9 @@ func trimDead(mask []byte) []byte { for len(mask) > 2 && mask[len(mask)-1] == typeScalar { mask = mask[:len(mask)-1] } + if len(mask) == 2 && mask[0] == typeScalar && mask[1] == typeScalar { + mask = mask[:0] + } return mask } diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go index 4afe663418..c317b5f969 100644 --- a/src/runtime/heapdump.go +++ b/src/runtime/heapdump.go @@ -714,7 +714,7 @@ func makeheapobjbv(p uintptr, size uintptr) bitvector { i := uintptr(0) hbits := heapBitsForAddr(p) for ; i < nptr; i++ { - if i >= 2 && !hbits.morePointers() { + if i != 1 && !hbits.morePointers() { break // end of object } if hbits.isPointer() { diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index c9cc82192d..bb17919fd0 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -700,7 +700,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { var scanSize uintptr if noscan { - heapBitsSetTypeNoScan(uintptr(x), size) + heapBitsSetTypeNoScan(uintptr(x)) } else { // If allocating a defer+arg block, now that we've picked a malloc size // large enough to hold everything, cut the "asked for" size down to diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 0bfb184945..8061e1d138 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -24,13 +24,15 @@ // In each 2-bit entry, the lower bit holds the same information as in the 1-bit // bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC. // The meaning of the high bit depends on the position of the word being described -// in its allocated object. In the first word, the high bit is unused. +// in its allocated object. In all words *except* the second word, the +// high bit indicates that the object is still being described. In +// these words, if a bit pair with a high bit 0 is encountered, the +// low bit can also be assumed to be 0, and the object description is +// over. This 00 is called the ``dead'' encoding: it signals that the +// rest of the words in the object are uninteresting to the garbage +// collector. +// // In the second word, the high bit is the GC ``checkmarked'' bit (see below). -// In the third and later words, the high bit indicates that the object is still -// being described. In these words, if a bit pair with a high bit 0 is encountered, -// the low bit can also be assumed to be 0, and the object description is over. -// This 00 is called the ``dead'' encoding: it signals that the rest of the words -// in the object are uninteresting to the garbage collector. // // The 2-bit entries are split when written into the byte, so that the top half // of the byte contains 4 high bits and the bottom half contains 4 low (pointer) @@ -76,7 +78,7 @@ import ( const ( bitPointer = 1 << 0 - bitMarked = 1 << 4 + bitMarked = 1 << 4 // TODO: Rename bitScan. heapBitsShift = 1 // shift offset between successive bitPointer or bitMarked entries heapBitmapScale = sys.PtrSize * (8 / 2) // number of data bytes described by one heap bitmap byte @@ -490,7 +492,7 @@ func (h heapBits) bits() uint32 { // morePointers returns true if this word and all remaining words in this object // are scalars. -// h must not describe the first or second word of the object. +// h must not describe the second word of the object. func (h heapBits) morePointers() bool { return h.bits()&bitMarked != 0 } @@ -512,22 +514,7 @@ func (h heapBits) hasPointers(size uintptr) bool { if size == sys.PtrSize { // 1-word objects are always pointers return true } - // Otherwise, at least a 2-word object, and at least 2-word aligned, - // so h.shift is either 0 or 2, so we know we can get the bits for the - // first two words out of *h.bitp. - // If either of the first two words is a pointer, not pointer free. - b := uint32(*h.bitp >> h.shift) - if b&(bitPointer|bitPointer<>h.shift)&bitMarked != 0 } // isCheckmarked reports whether the heap bits have the checkmarked bit set. @@ -720,9 +707,9 @@ func typeBitsBulkBarrier(typ *_type, p, size uintptr) { // TODO(rsc): Perhaps introduce a different heapBitsSpan type. // initSpan initializes the heap bitmap for a span. -// It clears all mark and checkmark bits. +// It clears all checkmark bits. // If this is a span of pointer-sized objects, it initializes all -// words to pointer (and there are no dead bits). +// words to pointer/scan. // Otherwise, it initializes all words to scalar/dead. func (h heapBits) initSpan(s *mspan) { size, n, total := s.layout() @@ -745,7 +732,7 @@ func (h heapBits) initSpan(s *mspan) { end := h.bitp bitp := subtractb(end, nbyte-1) for { - *bitp = bitPointerAll + *bitp = bitPointerAll | bitMarkedAll if bitp == end { break } @@ -897,6 +884,9 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { if !h.isPointer() { throw("heapBitsSetType: pointer bit missing") } + if !h.morePointers() { + throw("heapBitsSetType: scan bit missing") + } } return } @@ -924,17 +914,17 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // unused second word. if gcphase == _GCoff { *h.bitp &^= (bitPointer | bitMarked | ((bitPointer | bitMarked) << heapBitsShift)) << h.shift - *h.bitp |= bitPointer << h.shift + *h.bitp |= (bitPointer | bitMarked) << h.shift } else { atomic.And8(h.bitp, ^uint8((bitPointer|bitMarked|((bitPointer|bitMarked)<= nw { goto Phase3 } @@ -1169,14 +1167,18 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // from a given span at a time, we should be able to set // these bits non-atomically. Not worth the risk right now. hb = (b & (bitPointer | bitPointer<>= 2 nb -= 2 - // Note: no bitMarker in hb because the first two words don't get markers from us. + // Note: no bitMarker for second word because that's + // the checkmark. if gcphase == _GCoff { - *hbitp &^= uint8((bitPointer | (bitPointer << heapBitsShift)) << (2 * heapBitsShift)) + *hbitp &^= uint8((bitPointer | bitMarked | (bitPointer << heapBitsShift)) << (2 * heapBitsShift)) *hbitp |= uint8(hb) } else { - atomic.And8(hbitp, ^(uint8(bitPointer|bitPointer<>(j%8))&1 != 0 { want |= bitPointer } - if i >= 2 { + if i != 1 { want |= bitMarked } else { have &^= bitMarked @@ -1366,39 +1368,11 @@ Phase4: } } -// heapBitsSetTypeNoScan marks x as noscan. For objects with 1 or 2 -// words set their bitPointers to off (0). -// All other objects have the first 3 bitPointers set to -// off (0) and the scan word in the third word -// also set to off (0). -func heapBitsSetTypeNoScan(x, size uintptr) { +// heapBitsSetTypeNoScan marks x as noscan by setting the first word +// of x in the heap bitmap to scalar/dead. +func heapBitsSetTypeNoScan(x uintptr) { h := heapBitsForAddr(uintptr(x)) - bitp := h.bitp - - if sys.PtrSize == 8 && size == sys.PtrSize { - // If this is truely noScan the tinyAlloc logic should have noticed - // and combined such objects. - throw("noscan object is too small") - } else if size%(4*sys.PtrSize) == 0 { - *bitp &^= bitPointer | bitPointer< 2*sys.PtrSize { - *bitp &^= (bitPointer | bitMarked) << (2 * heapBitsShift) - } - } else if h.shift == 2 { - *bitp &^= bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift) - if size > 2*sys.PtrSize { - bitp = subtract1(bitp) - *bitp &^= bitPointer | bitMarked - } - } else { - throw("Type has unrecognized size") - } - } else { - throw("Type has unrecognized size") - } + *h.bitp &^= (bitPointer | bitMarked) << h.shift } var debugPtrmask struct { @@ -1804,12 +1778,6 @@ Run: dst = subtract1(dst) bits >>= 4 } - // Clear the mark bits in the first two entries. - // They are the actual mark and checkmark bits, - // not non-dead markers. It simplified the code - // above to set the marker in every bit written and - // then clear these two as a special case at the end. - *dstStart &^= bitMarked | bitMarked<= 2*sys.PtrSize && !hbits.morePointers() { + if i != 1*sys.PtrSize && !hbits.morePointers() { mask = mask[:i/sys.PtrSize] break } diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 8c8ce67fbf..af3205ab23 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -1132,7 +1132,7 @@ func scanobject(b uintptr, gcw *gcWork) { // in the type bit for the one word. The only one-word objects // are pointers, or else they'd be merged with other non-pointer // data into larger allocations. - if i >= 2*sys.PtrSize && !hbits.morePointers() { + if i != 1*sys.PtrSize && !hbits.morePointers() { break // no more pointers in this object } if !hbits.isPointer() { -- 2.48.1