}
// A temporary stand in for the count trailing zero ctz instruction.
-func ctz(markBits byte) uint8 {
- tz := uint8(0) // trailing zero count.
+// IA bsf works on 64 bit non-zero word.
+func ctz64(markBits uint64) uint64 {
if markBits == 0 {
- return 8 // 8
+ return 64 // bits in 64 bit word, ensures loop terminates
}
- for mask := byte(1); mask&markBits == 0; mask, tz = mask<<1, tz+1 {
+ // tz holds trailing zero count.
+ tz := uint64(0)
+ for mask := uint64(1); mask&markBits == 0; mask, tz = mask<<1, tz+1 {
}
return tz
}
-// nextFreeIndex returns the index of the next free object in s at or
-// after the index'th object.
+// refillAllocCache takes 8 bytes s.allocBits starting at whichByte
+// and negates them so that ctz (count trailing zeros) instructions
+// can be used. It then places these 8 bytes into the cached 64 bit
+// s.allocCache.
+func (s *mspan) refillAllocCache(whichByte uintptr) {
+ bytes := s.allocBits[whichByte : whichByte+8]
+ aCache := uint64(0)
+ aCache |= uint64(bytes[0])
+ aCache |= uint64(bytes[1]) << (1 * 8)
+ aCache |= uint64(bytes[2]) << (2 * 8)
+ aCache |= uint64(bytes[3]) << (3 * 8)
+ aCache |= uint64(bytes[4]) << (4 * 8)
+ aCache |= uint64(bytes[5]) << (5 * 8)
+ aCache |= uint64(bytes[6]) << (6 * 8)
+ aCache |= uint64(bytes[7]) << (7 * 8)
+ s.allocCache = ^aCache
+}
+
+// nextFreeIndex returns the index of the next free object in s at
+// or after s.freeindex.
// There are hardware instructions that can be used to make this
// faster if profiling warrants it.
-func (s *mspan) nextFreeIndex(index uintptr) uintptr {
- if index == s.nelems {
- return index
- }
- if index > s.nelems {
- throw("index > s.nelems")
- }
- whichByte := index / 8
- theByte := s.allocBits[whichByte]
-
- theBitMask := uint8(1<<(index%8) - 1)
- // theBitMask holds a 1 for every bit < index which have already been allocated.
- // Flip the masked marked bits so 1 means a free bit.
- theByte = ^(theByte | theBitMask)
- tz := ctz(theByte)
- if tz != 8 {
- result := uintptr(tz) + whichByte*8
- if result >= s.nelems {
- return s.nelems
- }
- return result
- }
- whichByte++
- index = (whichByte) * 8
- for ; index < s.nelems; index += 8 {
- theByte = ^s.allocBits[whichByte]
- tz = ctz(theByte)
- if tz != 8 {
- result := uintptr(tz) + whichByte*8
- if result >= s.nelems {
- return s.nelems
- }
- return result
+func (s *mspan) nextFreeIndex() uintptr {
+ if s.freeindex == s.nelems {
+ return s.freeindex
+ }
+ if s.freeindex > s.nelems {
+ throw("s.freeindex > s.nelems")
+ }
+
+ aCache := s.allocCache
+ bitIndex := ctz64(aCache)
+ for bitIndex == 64 {
+ // Move index to start of next cached bits.
+ s.freeindex = (s.freeindex + 64) &^ (64 - 1)
+ if s.freeindex >= s.nelems {
+ s.freeindex = s.nelems
+ return s.freeindex
}
- whichByte++
- }
- return s.nelems
+ whichByte := s.freeindex / 8
+ // Refill s.allocCache with the next 64 alloc bits.
+ // Unlike in allocBits a 1 in s.allocCache means
+ // the object is not marked.
+ s.refillAllocCache(whichByte)
+ aCache = s.allocCache
+ bitIndex = ctz64(aCache)
+ // Nothing was available try again now allocCache has been refilled.
+ }
+ result := s.freeindex + uintptr(bitIndex)
+ if result >= s.nelems {
+ s.freeindex = s.nelems
+ return s.freeindex
+ }
+ s.allocCache >>= bitIndex + 1
+ s.freeindex = result + 1
+
+ if s.freeindex%64 == 0 && s.freeindex != s.nelems {
+ // We just incremented s.freeindex so it isn't 0.
+ // As each 1 in s.allocCache was encountered and used for allocation
+ // it was shifted away. At this point s.allocCache contains all 0s.
+ // Refill s.allocCache so that it corresponds
+ // to the bits at s.allocBits starting at s.freeindex.
+ whichByte := s.freeindex / 8
+ s.refillAllocCache(whichByte)
+ }
+ return result
}
func (s *mspan) isFree(index uintptr) bool {
s.allocBits = &s.markbits1
s.gcmarkBits = &s.markbits2
s.freeindex = 0
+ s.allocCache = ^uint64(0) // all 1s indicating all free.
s.nelems = n
s.clearAllocBits()
s.clearGCMarkBits()
n := s.nelems
cl := s.sizeclass
doCall := debug.allocfreetrace != 0 || msanenabled || cl == 0
-
h := heapBitsForSpan(base)
switch {
default:
func heapBitsSweep8BitPtrs(h heapBits, s *mspan, base, n uintptr, cl uint8, doCall bool, f func(uintptr)) (nfree int) {
mbits := s.markBitsForBase()
- for i := uintptr(0); i < n; i += 4 {
+ // Consider mark bits in all four 2-bit entries of each bitmap byte.
+ if cl == 0 {
+ throw("8BitPtrs are not in cl 0")
+ }
+ // Consider mark bits in all four 2-bit entries of each bitmap byte.
+ for i := uintptr(0); i < n; i++ {
// Note that unlike the other size cases, we leave the pointer bits set here.
// These are initialized during initSpan when the span is created and left
// in place the whole time the span is used for pointer-sized objects.
// That lets heapBitsSetType avoid an atomic update to set the pointer bit
// during allocation.
- if !(mbits.isMarked() || mbits.index >= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) {
- if doCall {
+ if !mbits.isMarked() {
+ nfree++
+ if mbits.index < s.freeindex {
+ f(base + i*sys.PtrSize)
+ } else if s.allocBits[mbits.index/8]&mbits.mask == 1 {
+ // it was marked in the previous cycle but not this cycle
+ // if it wasn't marked in the prvious cycle the call would be redundant.
f(base + i*sys.PtrSize)
- }
- if cl != 0 {
- nfree++
- }
- }
- mbits.advance()
- if !(mbits.isMarked() || mbits.index >= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) {
- if doCall {
- f(base + (i+1)*sys.PtrSize)
- }
- if cl != 0 {
- nfree++
- }
- }
- mbits.advance()
- if !(mbits.isMarked() || mbits.index >= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) {
- if doCall {
- f(base + (i+2)*sys.PtrSize)
- }
- if cl != 0 {
- nfree++
- }
- }
- mbits.advance()
- if !(mbits.isMarked() || mbits.index >= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) {
- if doCall {
- f(base + (i+3)*sys.PtrSize)
- }
- if cl != 0 {
- nfree++
}
}
mbits.advance()
}
- return
+ return nfree
}
-func (m *markBits) nextFreed(maxIndex uintptr, s *mspan) bool {
+// nextFreed returns the next object that is being freed during this GC cycle.
+// If the mark bit is set then the object is free. If it is < s.freeindex
+// then either the object was freed during by this GC cycle.
+// If it is >= freeindex then if the allocBit is set then it was
+// freed during this GC cycle. If the allocBit is 0 it was freed
+// during a previous cycle so is not considered a freed.
+func (m *markBits) nextFreed(nelems uintptr, s *mspan, totalFree *int) bool {
mByte := *m.bytep
for {
for mByte == 0xff {
- if m.index >= maxIndex {
+ if m.index >= nelems {
return false
}
m.index = (m.index + 8) &^ (8 - 1)
m.mask = 1
m.bytep = add1(m.bytep)
mByte = *m.bytep
+ // Nothing free found totalFree remains the same.
}
- if m.index >= maxIndex {
+ if m.index >= nelems {
return false
}
- for m.index < maxIndex {
+ for m.index < nelems {
if m.mask&mByte == 0 {
+ // At this point we have a free object so update totalFree
+ *totalFree++
if m.index < s.freeindex {
return true
}
return false
}
-func heapBitsSweepMap(h heapBits, s *mspan, base, size, n uintptr, cl uint8, doCall bool, f func(uintptr)) (nfree int) {
+func heapBitsSweepMap(h heapBits, s *mspan, base, size, n uintptr, cl uint8, doCall bool, f func(uintptr)) int {
+ totalFree := 0
twobits := s.markBitsForBase()
- for twobits.nextFreed(n, s) {
+ for twobits.nextFreed(n, s, &totalFree) {
if doCall {
f(base + twobits.index*size)
}
- if cl != 0 {
- nfree++
- }
twobits.advance()
}
- return
+ return totalFree
}
// heapBitsSetType records that the new allocation [x, x+size)
c.empty.insertBack(s)
unlock(&c.lock)
s.sweep(true)
- freeIndex := s.nextFreeIndex(0)
+ freeIndex := s.nextFreeIndex()
if freeIndex != s.nelems {
s.freeindex = freeIndex
goto havespan
havespan:
cap := int32((s.npages << _PageShift) / s.elemsize)
n := cap - int32(s.allocCount)
- if n == 0 {
+ if n == 0 || s.freeindex == s.nelems || uintptr(s.allocCount) == s.nelems {
throw("span has no free objects")
}
usedBytes := uintptr(s.allocCount) * s.elemsize
gcController.revise()
}
s.incache = true
+ freeByteBase := s.freeindex &^ (64 - 1)
+ whichByte := freeByteBase / 8
+ // Init alloc bits cache.
+ s.refillAllocCache(whichByte)
+
+ // Adjust the allocCache so that s.freeindex corresponds to the low bit in
+ // s.allocCache.
+ s.allocCache >>= s.freeindex % 64
+
return s
}
unlock(&c.lock)
}
-// Free n objects from a span s back into the central free list c.
-// Called during sweep.
-// Returns true if the span was returned to heap. Sets sweepgen to
-// the latest generation.
-// If preserve=true, don't return the span to heap nor relink in MCentral lists;
-// caller takes care of it.
-func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, preserve bool, wasempty bool) bool {
+// freeSpan updates c and s after sweeping s.
+// It sets s's sweepgen to the latest generation,
+// and, based on the number of free objects in s,
+// moves s to the appropriate list of c or returns it
+// to the heap.
+// freeSpan returns true if s was returned to the heap.
+// If preserve=true, it does not move s (the caller
+// must take care of it).
+func (c *mcentral) freeSpan(s *mspan, start gclinkptr, end gclinkptr, preserve bool, wasempty bool) bool {
if s.incache {
throw("freeSpan given cached span")
}
- s.allocCount -= uint16(n)
-
if preserve {
// preserve is set only when called from MCentral_CacheSpan above,
// the span must be in the empty list.
// the block bitmap without atomic operations.
nfree = heapBitsSweepSpan(s, func(p uintptr) {
- // At this point we know that we are looking at garbage object
+ // At this point we know that we are looking at a garbage object
// that needs to be collected.
if debug.allocfreetrace != 0 {
tracefree(unsafe.Pointer(p), size)
}
}
})
-
- wasempty := s.nextFreeIndex(s.freeindex) == s.nelems
+ s.allocCount = uint16(s.nelems) - uint16(nfree)
+ wasempty := s.nextFreeIndex() == s.nelems
s.freeindex = 0 // reset allocation index to start of span.
// Clear gcmarkBits in preparation for next GC
s.allocBits, s.gcmarkBits = s.gcmarkBits, s.allocBits
s.clearGCMarkBits() // prepare for next GC
+ // Initialize alloc bits cache.
+ s.refillAllocCache(0)
// We need to set s.sweepgen = h.sweepgen only when all blocks are swept,
// because of the potential for a concurrent free/SetFinalizer.
// to go so release the span.
atomic.Store(&s.sweepgen, sweepgen)
}
- if nfree > 0 {
+
+ if nfree > 0 && cl != 0 {
c.local_nsmallfree[cl] += uintptr(nfree)
- res = mheap_.central[cl].mcentral.freeSpan(s, int32(nfree), head, end, preserve, wasempty)
+ res = mheap_.central[cl].mcentral.freeSpan(s, head, end, preserve, wasempty)
// MCentral_FreeSpan updates sweepgen
} else if freeToHeap {
// Free large span to heap
// undefined and should never be referenced.
//
// Object n starts at address n*elemsize + (start << pageShift).
- freeindex uintptr
+ freeindex uintptr
+
+ // Cache of the allocBits at freeindex. allocCache is shifted
+ // such that the lowest bit corresponds to the bit freeindex.
+ // allocCache holds the complement of allocBits, thus allowing
+ // ctz64 (count trailing zero) to use it directly.
+ // allocCache may contain bits beyond s.nelems; the caller must ignore
+ // these.
+ allocCache uint64
allocBits *[maxObjsPerSpan / 8]uint8
gcmarkBits *[maxObjsPerSpan / 8]uint8
nelems uintptr // number of object in the span.
func (list *mSpanList) remove(span *mspan) {
if span.prev == nil || span.list != list {
- println("failed MSpanList_Remove", span, span.prev, span.list, list)
+ println("runtime: failed MSpanList_Remove", span, span.prev, span.list, list)
throw("MSpanList_Remove")
}
if span.next != nil {
func (list *mSpanList) insert(span *mspan) {
if span.next != nil || span.prev != nil || span.list != nil {
- println("failed MSpanList_Insert", span, span.next, span.prev, span.list)
+ println("runtime: failed MSpanList_Insert", span, span.next, span.prev, span.list)
throw("MSpanList_Insert")
}
span.next = list.first