From e375ca2a25ba08806e2dbc060a79ef79849d688e Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Thu, 7 May 2015 11:03:17 -0400 Subject: [PATCH] runtime: reorder bits in heap bitmap bytes MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The runtime deals with 1-bit pointer bitmaps and 2-bit heap bitmaps that have entries for both pointers and mark bits. Each byte in a 1-bit pointer bitmap looks like pppppppp (all pointer bits). Each byte in a 2-bit heap bitmap looks like mpmpmpmp (mark, pointer, ...). This means that when converting from 1-bit to 2-bit, as we do during malloc, we have to pick up 4 bits in pppp form and use shifts to create the mpmpmpmp form. This CL changes the 2-bit heap bitmap form to mmmmpppp, so that 4 bits picked up in 1-bit form can be used directly in the low bits of the heap bitmap byte, without expansion. This simplifies the code, and it also happens to be faster. name old mean new mean delta SetTypePtr 14.0ns × (0.98,1.09) 14.0ns × (0.98,1.08) ~ (p=0.966) SetTypePtr8 16.5ns × (0.99,1.05) 15.3ns × (0.96,1.16) -6.86% (p=0.012) SetTypePtr16 21.3ns × (0.98,1.05) 18.8ns × (0.94,1.14) -11.49% (p=0.000) SetTypePtr32 34.6ns × (0.93,1.22) 27.7ns × (0.91,1.26) -20.08% (p=0.001) SetTypePtr64 55.7ns × (0.97,1.11) 41.6ns × (0.98,1.04) -25.30% (p=0.000) SetTypePtr126 98.0ns × (1.00,1.00) 67.7ns × (0.99,1.05) -30.88% (p=0.000) SetTypePtr128 98.6ns × (1.00,1.01) 68.6ns × (0.99,1.03) -30.44% (p=0.000) SetTypePtrSlice 781ns × (0.99,1.01) 571ns × (0.99,1.04) -26.93% (p=0.000) SetTypeNode1 13.1ns × (0.99,1.01) 12.1ns × (0.99,1.01) -7.45% (p=0.000) SetTypeNode1Slice 113ns × (0.99,1.01) 94ns × (1.00,1.00) -16.35% (p=0.000) SetTypeNode8 32.7ns × (1.00,1.00) 29.8ns × (0.99,1.01) -8.97% (p=0.000) SetTypeNode8Slice 266ns × (1.00,1.00) 204ns × (1.00,1.00) -23.40% (p=0.000) SetTypeNode64 58.0ns × (0.98,1.08) 42.8ns × (1.00,1.01) -26.24% (p=0.000) SetTypeNode64Slice 1.55µs × (0.99,1.02) 0.96µs × (1.00,1.00) -37.84% (p=0.000) SetTypeNode64Dead 13.1ns × (0.99,1.01) 12.1ns × (1.00,1.00) -7.33% (p=0.000) SetTypeNode64DeadSlice 1.52µs × (1.00,1.01) 1.08µs × (1.00,1.01) -28.95% (p=0.000) SetTypeNode124 97.9ns × (1.00,1.00) 67.1ns × (1.00,1.01) -31.49% (p=0.000) SetTypeNode124Slice 2.87µs × (0.99,1.02) 1.75µs × (1.00,1.01) -39.15% (p=0.000) SetTypeNode126 98.4ns × (1.00,1.01) 68.1ns × (1.00,1.01) -30.79% (p=0.000) SetTypeNode126Slice 2.91µs × (0.99,1.01) 1.77µs × (0.99,1.01) -39.09% (p=0.000) SetTypeNode1024 732ns × (1.00,1.00) 511ns × (0.87,1.42) -30.14% (p=0.000) SetTypeNode1024Slice 23.1µs × (1.00,1.00) 13.9µs × (0.99,1.02) -39.83% (p=0.000) Change-Id: I12e3b850a4e6fa6c8146b8635ff728f3ef658819 Reviewed-on: https://go-review.googlesource.com/9828 Reviewed-by: Austin Clements --- src/runtime/mbitmap.go | 110 ++++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 50 deletions(-) diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 61e1254bed..234aa9509a 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -8,7 +8,8 @@ // // Stack frames and global variables in the data and bss sections are described // by 1-bit bitmaps in which 0 means uninteresting and 1 means live pointer -// to be visited during GC. +// to be visited during GC. The bits in each byte are consumed starting with +// the low bit: 1<<0, 1<<1, and so on. // // Heap bitmap // @@ -19,8 +20,6 @@ // That is, the byte at address start-1 holds the 2-bit entries for the four words // start through start+3*ptrSize, the byte at start-2 holds the entries for // start+4*ptrSize through start+7*ptrSize, and so on. -// In each byte, the low 2 bits describe the first word, the next 2 bits describe -// the next word, and so on. // // In each 2-bit entry, the lower bit holds the same information as in the 1-bit // bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC. @@ -33,6 +32,11 @@ // This 00 is called the ``dead'' encoding: it signals that the rest of the words // in the object are uninteresting to the garbage collector. // +// The 2-bit entries are split when written into the byte, so that the top half +// of the byte contains 4 mark bits and the bottom half contains 4 pointer bits. +// This form allows a copy from the 1-bit to the 4-bit form to keep the +// pointer bits contiguous, instead of having to space them out. +// // The code makes use of the fact that the zero value for a heap bitmap // has no live pointer bit set and is (depending on position), not marked, // not checkmarked, and is the dead encoding. @@ -65,11 +69,15 @@ package runtime import "unsafe" const ( - bitPointer = 1 - bitMarked = 2 + bitPointer = 1 << 0 + bitMarked = 1 << 4 + + heapBitsShift = 1 // shift offset between successive bitPointer or bitMarked entries + heapBitmapScale = ptrSize * (8 / 2) // number of data bytes described by one heap bitmap byte - heapBitsWidth = 2 // heap bitmap bits to describe one pointer - heapBitmapScale = ptrSize * (8 / heapBitsWidth) // number of data bytes described by one heap bitmap byte + // all mark/pointer bits in a byte + bitMarkedAll = bitMarked | bitMarked<> h.shift) - if b&(bitPointer|bitPointer<>(heapBitsWidth+h.shift))&bitMarked != 0 + return (*h.bitp>>(heapBitsShift+h.shift))&bitMarked != 0 } // setCheckmarked sets the checkmarked bit. @@ -307,7 +315,7 @@ func (h heapBits) setCheckmarked(size uintptr) { atomicor8(h.bitp, bitPointer< 2*ptrSize { x = 0 @@ -454,10 +462,10 @@ func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) { } bitp = subtractb(bitp, step) x = uint32(*bitp) - if x&(bitMarked<<4) != 0 { - x &^= bitMarked << 4 + if x&(bitMarked<<(2*heapBitsShift)) != 0 { + x &^= bitMarked << (2 * heapBitsShift) } else { - x &^= 0xf0 + x &^= (bitMarked|bitPointer)<<(2*heapBitsShift) | (bitMarked|bitPointer)<<(3*heapBitsShift) f(base + (i+1)*size) if size > 2*ptrSize { *subtractb(bitp, 1) = 0 @@ -554,12 +562,12 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { if size == 2*ptrSize { if typ.size == ptrSize { // 2-element slice of pointer. - atomicor8(h.bitp, (bitPointer|bitPointer<= nw { goto Phase3 } @@ -724,7 +732,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { b >>= 4 nb -= 4 - case ptrSize == 8 && h.shift == 4: + case ptrSize == 8 && h.shift == 2: // Ptrmask and heap bitmap are misaligned. // The bits for the first two words are in a byte shared with another object // and must be updated atomically. @@ -735,7 +743,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // not with its mark bit. Since there is only one allocation // from a given span at a time, we should be able to set // these bits non-atomically. Not worth the risk right now. - hb = (b&1)<<4 | (b&2)<<(4+heapBitsWidth-1) // bits being prepared for *h.bitp + hb = (b & 3) << (2 * heapBitsShift) b >>= 2 nb -= 2 // Note: no bitMarker in hb because the first two words don't get markers from us. @@ -763,8 +771,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // if w+4 >= nw, then b has only nw-w bits, // but we'll stop at the break and then truncate // appropriately in Phase 3. - hb = b&1 | (b&2)<<(heapBitsWidth-1) | (b&4)<<(2*heapBitsWidth-2) | (b&8)<<(3*heapBitsWidth-3) - hb |= bitMarked | bitMarked<= nw { break } @@ -803,8 +811,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { } // Emit bitmap byte. - hb = b&1 | (b&2)<<(heapBitsWidth-1) | (b&4)<<(2*heapBitsWidth-2) | (b&8)<<(3*heapBitsWidth-3) - hb |= bitMarked | bitMarked<= nw { break } @@ -822,15 +830,16 @@ Phase3: // then we must write a ``dead'' entry to the next bitmap byte. if frag := (nw - w) % 4; frag != 0 { // Data ends at least one word early. - hb &= 1<<(heapBitsWidth*frag) - 1 + mask := uintptr(1)<= size { break } - have = (*h.bitp >> h.shift) & 3 + have = (*h.bitp >> h.shift) & (bitPointer | bitMarked) if i == dataSize/ptrSize || i/ndata == count-1 && j >= nptr { want = 0 // dead marker } else { @@ -883,8 +892,9 @@ Phase4: println("mismatch writing bits for", *typ._string, "x", dataSize/typ.size) print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n") print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n") - h = heapBitsForAddr(x) - print("initial bits h.bitp=", h.bitp, " h.shift=", h.shift, "\n") + h0 := heapBitsForAddr(x) + print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n") + print("current bits h.bitp=", h.bitp, " h.shift=", h.shift, " *h.bitp=", hex(*h.bitp), "\n") print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n") println("at word", i, "offset", i*ptrSize, "have", have, "want", want) throw("bad heapBitsSetType") -- 2.48.1