// In each 2-bit entry, the lower bit holds the same information as in the 1-bit
// bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC.
// The meaning of the high bit depends on the position of the word being described
-// in its allocated object. In the first word, the high bit is the GC ``marked'' bit.
+// in its allocated object. In the first word, the high bit is unused.
// In the second word, the high bit is the GC ``checkmarked'' bit (see below).
// In the third and later words, the high bit indicates that the object is still
// being described. In these words, if a bit pair with a high bit 0 is encountered,
// in the object are uninteresting to the garbage collector.
//
// The 2-bit entries are split when written into the byte, so that the top half
-// of the byte contains 4 mark bits and the bottom half contains 4 pointer bits.
+// of the byte contains 4 high bits and the bottom half contains 4 low (pointer)
+// bits.
// This form allows a copy from the 1-bit to the 4-bit form to keep the
// pointer bits contiguous, instead of having to space them out.
//
// The code makes use of the fact that the zero value for a heap bitmap
-// has no live pointer bit set and is (depending on position), not marked,
+// has no live pointer bit set and is (depending on position), not used,
// not checkmarked, and is the dead encoding.
// These properties must be preserved when modifying the encoding.
//
// It is still used in general, except in checkmark the type bit is repurposed
// as the checkmark bit and then reinitialized (to 1) as the type bit when
// finished.
+//
package runtime
func (s *mspan) markBitsForAddr(p uintptr) markBits {
byteOffset := p - s.base()
- markBitIndex := byteOffset / s.elemsize // TODO if hot spot use fancy divide....
- return s.markBitsForIndex(markBitIndex)
-}
-
-func (s *mspan) markBitsForIndex(markBitIndex uintptr) markBits {
+ markBitIndex := uintptr(0)
+ if byteOffset != 0 {
+ // markBitIndex := (p - s.base()) / s.elemsize, using division by multiplication
+ markBitIndex = uintptr(uint64(byteOffset) >> s.divShift * uint64(s.divMul) >> s.divShift2)
+ }
whichByte := markBitIndex / 8
whichBit := markBitIndex % 8
return markBits{&s.gcmarkBits[whichByte], uint8(1 << whichBit), markBitIndex}
}
+func (s *mspan) markBitsForBase() markBits {
+ return markBits{&s.gcmarkBits[0], uint8(1), 0}
+}
+
// isMarked reports whether mark bit m is set.
func (m markBits) isMarked() bool {
return *m.bytep&m.mask != 0
return mbits
}
+// advance advances the markBits to the next object in the span.
+func (m *markBits) advance() {
+ if m.mask == 1<<7 {
+ m.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(m.bytep)) + 1))
+ m.mask = 1
+ } else {
+ m.mask = m.mask << 1
+ }
+ m.index++
+}
+
// heapBitsForAddr returns the heapBits for the address addr.
// The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used).
//
return uint32(*h.bitp) >> (h.shift & 31)
}
-// isMarked reports whether the heap bits have the marked bit set.
-// h must describe the initial word of the object.
-func (h heapBits) isMarked() bool {
+// morePointers returns true if this word and all remaining words in this object
+// are scalars.
+// h must not describe the first or second word of the object.
+func (h heapBits) morePointers() bool {
return *h.bitp&(bitMarked<<h.shift) != 0
}
-// setMarked sets the marked bit in the heap bits, atomically.
-// h must describe the initial word of the object.
-func (h heapBits) setMarked() {
- // Each byte of GC bitmap holds info for four words.
- // Might be racing with other updates, so use atomic update always.
- // We used to be clever here and use a non-atomic update in certain
- // cases, but it's not worth the risk.
- atomic.Or8(h.bitp, bitMarked<<h.shift)
-}
-
-// setMarkedNonAtomic sets the marked bit in the heap bits, non-atomically.
-// h must describe the initial word of the object.
-func (h heapBits) setMarkedNonAtomic() {
- *h.bitp |= bitMarked << h.shift
-}
-
// isPointer reports whether the heap bits describe a pointer word.
// h must describe the initial word of the object.
//
}
}
-// heapBitsSweepSpan coordinates the sweeping of a span by reading
-// and updating the corresponding heap bitmap entries.
-// For each free object in the span, heapBitsSweepSpan sets the type
-// bits for the first four words (less for smaller objects) to scalar/dead
-// and then calls f(p), where p is the object's base address.
-// f is expected to add the object to a free list.
-// For non-free objects, heapBitsSweepSpan turns off the marked bit.
-func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) {
+// heapBitsSweepSpan coordinates the sweeping of a span and inspects
+// each freed object. If objects are being traced or if msan is enabled
+// then heapBitsSweepSpan calls f(p), where p is the object's base address.
+// When not tracing and msan is not enabled heapBitsSweepSpan is lightweight.
+// heapBitsSweepSpan never alters the pointer/scalar heapBit maps. HeapBit map
+// maintenance is the responsibility of the allocation routines.
+// TODO:(rlh) Deal with the checkmark bits but moving them
+// out of heap bitmap thus enabling bulk clearing.
+func heapBitsSweepSpan(s *mspan, f func(uintptr)) (nfree int) {
+ base := s.base()
+ size := s.elemsize
+ n := s.nelems
+ cl := s.sizeclass
+ doCall := debug.allocfreetrace != 0 || msanenabled || cl == 0
+
h := heapBitsForSpan(base)
switch {
default:
throw("heapBitsSweepSpan")
case sys.PtrSize == 8 && size == sys.PtrSize:
- // Consider mark bits in all four 2-bit entries of each bitmap byte.
- bitp := h.bitp
- for i := uintptr(0); i < n; i += 4 {
- x := uint32(*bitp)
- // Note that unlike the other size cases, we leave the pointer bits set here.
- // These are initialized during initSpan when the span is created and left
- // in place the whole time the span is used for pointer-sized objects.
- // That lets heapBitsSetType avoid an atomic update to set the pointer bit
- // during allocation.
- if x&bitMarked != 0 {
- x &^= bitMarked
- } else {
+ nfree = heapBitsSweep8BitPtrs(h, s, base, n, cl, doCall, f)
+ case size%(4*sys.PtrSize) == 0:
+ nfree = heapBitsSweepMap(h, s, base, size, n, cl, doCall, f)
+ case size%(4*sys.PtrSize) == 2*sys.PtrSize:
+ nfree = heapBitsSweepMap(h, s, base, size, n, cl, doCall, f)
+ }
+ return
+}
+
+func heapBitsSweep8BitPtrs(h heapBits, s *mspan, base, n uintptr, cl uint8, doCall bool, f func(uintptr)) (nfree int) {
+ mbits := s.markBitsForBase()
+ for i := uintptr(0); i < n; i += 4 {
+ // Note that unlike the other size cases, we leave the pointer bits set here.
+ // These are initialized during initSpan when the span is created and left
+ // in place the whole time the span is used for pointer-sized objects.
+ // That lets heapBitsSetType avoid an atomic update to set the pointer bit
+ // during allocation.
+ if !(mbits.isMarked() || mbits.index >= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) {
+ if doCall {
f(base + i*sys.PtrSize)
}
- if x&(bitMarked<<heapBitsShift) != 0 {
- x &^= bitMarked << heapBitsShift
- } else {
+ if cl != 0 {
+ nfree++
+ }
+ }
+ mbits.advance()
+ if !(mbits.isMarked() || mbits.index >= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) {
+ if doCall {
f(base + (i+1)*sys.PtrSize)
}
- if x&(bitMarked<<(2*heapBitsShift)) != 0 {
- x &^= bitMarked << (2 * heapBitsShift)
- } else {
+ if cl != 0 {
+ nfree++
+ }
+ }
+ mbits.advance()
+ if !(mbits.isMarked() || mbits.index >= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) {
+ if doCall {
f(base + (i+2)*sys.PtrSize)
}
- if x&(bitMarked<<(3*heapBitsShift)) != 0 {
- x &^= bitMarked << (3 * heapBitsShift)
- } else {
+ if cl != 0 {
+ nfree++
+ }
+ }
+ mbits.advance()
+ if !(mbits.isMarked() || mbits.index >= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) {
+ if doCall {
f(base + (i+3)*sys.PtrSize)
}
- *bitp = uint8(x)
- bitp = subtract1(bitp)
+ if cl != 0 {
+ nfree++
+ }
}
+ mbits.advance()
+ }
+ return
+}
- case size%(4*sys.PtrSize) == 0:
- // Mark bit is in first word of each object.
- // Each object starts at bit 0 of a heap bitmap byte.
- bitp := h.bitp
- step := size / heapBitmapScale
- for i := uintptr(0); i < n; i++ {
- x := uint32(*bitp)
- if x&bitMarked != 0 {
- x &^= bitMarked
- } else {
- x = 0
- f(base + i*size)
+func (m *markBits) nextFreed(maxIndex uintptr, s *mspan) bool {
+ mByte := *m.bytep
+ for {
+ for mByte == 0xff {
+ if m.index >= maxIndex {
+ return false
}
- *bitp = uint8(x)
- bitp = subtractb(bitp, step)
+ m.index = (m.index + 8) &^ (8 - 1)
+ m.mask = 1
+ m.bytep = add1(m.bytep)
+ mByte = *m.bytep
}
-
- case size%(4*sys.PtrSize) == 2*sys.PtrSize:
- // Mark bit is in first word of each object,
- // but every other object starts halfway through a heap bitmap byte.
- // Unroll loop 2x to handle alternating shift count and step size.
- bitp := h.bitp
- step := size / heapBitmapScale
- var i uintptr
- for i = uintptr(0); i < n; i += 2 {
- x := uint32(*bitp)
- if x&bitMarked != 0 {
- x &^= bitMarked
- } else {
- x &^= bitMarked | bitPointer | (bitMarked|bitPointer)<<heapBitsShift
- f(base + i*size)
- if size > 2*sys.PtrSize {
- x = 0
+ if m.index >= maxIndex {
+ return false
+ }
+ for m.index < maxIndex {
+ if m.mask&mByte == 0 {
+ if m.index < s.freeindex {
+ return true
+ }
+ if s.allocBits[m.index/8]&m.mask != 0 {
+ return true
}
}
- *bitp = uint8(x)
- if i+1 >= n {
+ if m.mask == 1<<7 {
+ m.mask = 1
+ m.bytep = add1(m.bytep)
+ mByte = *m.bytep
+ m.index++
break
- }
- bitp = subtractb(bitp, step)
- x = uint32(*bitp)
- if x&(bitMarked<<(2*heapBitsShift)) != 0 {
- x &^= bitMarked << (2 * heapBitsShift)
} else {
- x &^= (bitMarked|bitPointer)<<(2*heapBitsShift) | (bitMarked|bitPointer)<<(3*heapBitsShift)
- f(base + (i+1)*size)
- if size > 2*sys.PtrSize {
- *subtract1(bitp) = 0
- }
+ m.mask = m.mask << 1
+ m.index++
}
- *bitp = uint8(x)
- bitp = subtractb(bitp, step+1)
}
}
+ return false
+}
+
+func heapBitsSweepMap(h heapBits, s *mspan, base, size, n uintptr, cl uint8, doCall bool, f func(uintptr)) (nfree int) {
+ twobits := s.markBitsForBase()
+ for twobits.nextFreed(n, s) {
+ if doCall {
+ f(base + twobits.index*size)
+ }
+ if cl != 0 {
+ nfree++
+ }
+ twobits.advance()
+ }
+ return
}
// heapBitsSetType records that the new allocation [x, x+size)
// size is sizeof(_defer{}) (at least 6 words) and dataSize may be
// arbitrarily larger.
//
- // The checks for size == ptrSize and size == 2*ptrSize can therefore
+ // The checks for size == sys.PtrSize and size == 2*sys.PtrSize can therefore
// assume that dataSize == size without checking it explicitly.
if sys.PtrSize == 8 && size == sys.PtrSize {
// (In general the number of instances of typ being allocated is
// dataSize/typ.size.)
if sys.PtrSize == 4 && dataSize == sys.PtrSize {
- // 1 pointer.
+ // 1 pointer object. On 32-bit machines clear the bit for the
+ // unused second word.
if gcphase == _GCoff {
+ *h.bitp &^= (bitPointer | bitMarked | ((bitPointer | bitMarked) << heapBitsShift)) << h.shift
*h.bitp |= bitPointer << h.shift
} else {
+ atomic.And8(h.bitp, ^uint8((bitPointer|bitMarked|((bitPointer|bitMarked)<<heapBitsShift))<<h.shift))
atomic.Or8(h.bitp, bitPointer<<h.shift)
}
} else {
}
return
}
- // Otherwise typ.size must be 2*ptrSize, and typ.kind&kindGCProg == 0.
+ // Otherwise typ.size must be 2*sys.PtrSize,
+ // and typ.kind&kindGCProg == 0.
if doubleCheck {
if typ.size != 2*sys.PtrSize || typ.kind&kindGCProg != 0 {
print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n")
b := uint32(*ptrmask)
hb := b & 3
if gcphase == _GCoff {
+ // bitPointer == 1, bitMarked is 1 << 4, heapBitsShift is 1.
+ // 110011 is shifted h.shift and complemented.
+ // This clears out the bits that are about to be
+ // ored into *h.hbitp in the next instructions.
+ *h.bitp &^= (bitPointer | bitMarked | ((bitPointer | bitMarked) << heapBitsShift)) << h.shift
*h.bitp |= uint8(hb << h.shift)
} else {
+ // TODO:(rlh) since the GC is not concurrently setting the
+ // mark bits in the heap map anymore and malloc
+ // owns the span we are allocating in why does this have
+ // to be atomic?
+
+ atomic.And8(h.bitp, ^uint8((bitPointer|bitMarked|((bitPointer|bitMarked)<<heapBitsShift))<<h.shift))
atomic.Or8(h.bitp, uint8(hb<<h.shift))
}
return
// Replicate ptrmask to fill entire pbits uintptr.
// Doubling and truncating is fewer steps than
// iterating by nb each time. (nb could be 1.)
- // Since we loaded typ.ptrdata/ptrSize bits
- // but are pretending to have typ.size/ptrSize,
+ // Since we loaded typ.ptrdata/sys.PtrSize bits
+ // but are pretending to have typ.size/sys.PtrSize,
// there might be no replication necessary/possible.
pbits = b
endnb = nb
// not with its mark bit. Since there is only one allocation
// from a given span at a time, we should be able to set
// these bits non-atomically. Not worth the risk right now.
- hb = (b & 3) << (2 * heapBitsShift)
+ hb = (b & (bitPointer | bitPointer<<heapBitsShift)) << (2 * heapBitsShift)
b >>= 2
nb -= 2
// Note: no bitMarker in hb because the first two words don't get markers from us.
if gcphase == _GCoff {
+ *hbitp &^= uint8((bitPointer | (bitPointer << heapBitsShift)) << (2 * heapBitsShift))
*hbitp |= uint8(hb)
} else {
+ atomic.And8(hbitp, ^(uint8(bitPointer|bitPointer<<heapBitsShift) << (2 * heapBitsShift)))
atomic.Or8(hbitp, uint8(hb))
}
hbitp = subtract1(hbitp)
}
}
+// heapBitsSetTypeNoScan marks x as noscan. For objects with 1 or 2
+// words set their bitPointers to off (0).
+// All other objects have the first 3 bitPointers set to
+// off (0) and the scan word in the third word
+// also set to off (0).
+func heapBitsSetTypeNoScan(x, size uintptr) {
+ h := heapBitsForAddr(uintptr(x))
+ bitp := h.bitp
+
+ if sys.PtrSize == 8 && size == sys.PtrSize {
+ // If this is truely noScan the tinyAlloc logic should have noticed
+ // and combined such objects.
+ throw("noscan object is too small")
+ } else if size%(4*sys.PtrSize) == 0 {
+ *bitp &^= bitPointer | bitPointer<<heapBitsShift | (bitMarked|bitPointer)<<(2*heapBitsShift)
+ } else if size%(4*sys.PtrSize) == 2*sys.PtrSize {
+ if h.shift == 0 {
+ *bitp &^= (bitPointer | bitPointer<<heapBitsShift)
+ if size > 2*sys.PtrSize {
+ *bitp &^= (bitPointer | bitMarked) << (2 * heapBitsShift)
+ }
+ } else if h.shift == 2 {
+ *bitp &^= bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift)
+ if size > 2*sys.PtrSize {
+ bitp = subtract1(bitp)
+ *bitp &^= bitPointer | bitMarked
+ }
+ } else {
+ throw("Type has unrecognized size")
+ }
+ } else {
+ throw("Type has unrecognized size")
+ }
+}
+
var debugPtrmask struct {
lock mutex
data *byte
// progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
// size the size of the region described by prog, in bytes.
-// The resulting bitvector will have no more than size/ptrSize bits.
+// The resulting bitvector will have no more than size/sys.PtrSize bits.
func progToPointerMask(prog *byte, size uintptr) bitvector {
n := (size/sys.PtrSize + 7) / 8
x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1]
// into a register and use that register for the entire loop
// instead of repeatedly reading from memory.
// Handling fewer than 8 bits here makes the general loop simpler.
- // The cutoff is ptrSize*8 - 7 to guarantee that when we add
+ // The cutoff is sys.PtrSize*8 - 7 to guarantee that when we add
// the pattern to a bit buffer holding at most 7 bits (a partial byte)
// it will not overflow.
src := dst
if hbits.isPointer() {
mask[i/sys.PtrSize] = 1
}
- if i >= 2*sys.PtrSize && !hbits.isMarked() {
+ if i >= 2*sys.PtrSize && !hbits.morePointers() {
mask = mask[:i/sys.PtrSize]
break
}
c := _g_.m.mcache
freeToHeap := false
- // Mark any free objects in this span so we don't collect them.
- sstart := uintptr(s.start << _PageShift)
- for link := s.freelist; link.ptr() != nil; link = link.ptr().next {
- if uintptr(link) < sstart || s.limit <= uintptr(link) {
- // Free list is corrupted.
- dumpFreeList(s)
- throw("free list corrupted")
- }
- heapBitsForAddr(uintptr(link)).setMarkedNonAtomic()
- }
+ // The allocBits indicate which unmarked objects don't need to be
+ // processed since they were free at the end of the last GC cycle
+ // and were not allocated since then.
+ // If the allocBits index is >= s.freeindex and the bit
+ // is not marked then the object remains unallocated
+ // since the last GC.
+ // This situation is analogous to being on a freelist.
// Unlink & free special records for any objects we're about to free.
// Two complications here:
for special != nil {
// A finalizer can be set for an inner byte of an object, find object beginning.
p := uintptr(s.start<<_PageShift) + uintptr(special.offset)/size*size
- hbits := heapBitsForAddr(p)
- if !hbits.isMarked() {
+ mbits := s.markBitsForAddr(p)
+ if !mbits.isMarked() {
// This object is not marked and has at least one special record.
// Pass 1: see if it has at least one finalizer.
hasFin := false
for tmp := special; tmp != nil && uintptr(tmp.offset) < endOffset; tmp = tmp.next {
if tmp.kind == _KindSpecialFinalizer {
// Stop freeing of object if it has a finalizer.
- hbits.setMarkedNonAtomic()
+ mbits.setMarkedNonAtomic()
hasFin = true
break
}
// This thread owns the span now, so it can manipulate
// the block bitmap without atomic operations.
- size, n, _ := s.layout()
- heapBitsSweepSpan(s.base(), size, n, func(p uintptr) {
+ nfree = heapBitsSweepSpan(s, func(p uintptr) {
// At this point we know that we are looking at garbage object
// that needs to be collected.
if debug.allocfreetrace != 0 {
} else if size > sys.PtrSize {
*(*uintptr)(unsafe.Pointer(p + sys.PtrSize)) = 0
}
- if head.ptr() == nil {
- head = gclinkptr(p)
- } else {
- end.ptr().next = gclinkptr(p)
- }
- end = gclinkptr(p)
- end.ptr().next = gclinkptr(0x0bade5)
- nfree++
}
})
+ wasempty := s.nextFreeIndex(s.freeindex) == s.nelems
+
+ s.freeindex = 0 // reset allocation index to start of span.
+
+ // Swap role of allocBits with gcmarkBits
+ // Clear gcmarkBits in preparation for next GC
+ s.allocBits, s.gcmarkBits = s.gcmarkBits, s.allocBits
+ s.clearGCMarkBits() // prepare for next GC
+
// We need to set s.sweepgen = h.sweepgen only when all blocks are swept,
// because of the potential for a concurrent free/SetFinalizer.
// But we need to set it before we make the span available for allocation
print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
throw("MSpan_Sweep: bad span state after sweep")
}
+ // Serialization point.
+ // At this point the mark bits are cleared and allocation ready
+ // to go so release the span.
atomic.Store(&s.sweepgen, sweepgen)
}
if nfree > 0 {
c.local_nsmallfree[cl] += uintptr(nfree)
- res = mheap_.central[cl].mcentral.freeSpan(s, int32(nfree), head, end, preserve)
+ res = mheap_.central[cl].mcentral.freeSpan(s, int32(nfree), head, end, preserve, wasempty)
// MCentral_FreeSpan updates sweepgen
} else if freeToHeap {
// Free large span to heap
throw("spanBytesAlloc underflow")
}
}
-
-func dumpFreeList(s *mspan) {
- printlock()
- print("runtime: free list of span ", s, ":\n")
- sstart := uintptr(s.start << _PageShift)
- link := s.freelist
- for i := 0; i < int(s.npages*_PageSize/s.elemsize); i++ {
- if i != 0 {
- print(" -> ")
- }
- print(hex(link))
- if link.ptr() == nil {
- break
- }
- if uintptr(link) < sstart || s.limit <= uintptr(link) {
- // Bad link. Stop walking before we crash.
- print(" (BAD)")
- break
- }
- link = link.ptr().next
- }
- print("\n")
- printunlock()
-}