From 3479b065d43f2990ac12e7b00ddff6f63a876ca9 Mon Sep 17 00:00:00 2001 From: Rick Hudson Date: Thu, 11 Feb 2016 13:57:58 -0500 Subject: [PATCH] [dev.garbage] runtime: allocate directly from GC mark bits Instead of building a freelist from the mark bits generated by the GC this CL allocates directly from the mark bits. The approach moves the mark bits from the pointer/no pointer heap structures into their own per span data structures. The mark/allocation vectors consist of a single mark bit per object. Two vectors are maintained, one for allocation and one for the GC's mark phase. During the GC cycle's sweep phase the interpretation of the vectors is swapped. The mark vector becomes the allocation vector and the old allocation vector is cleared and becomes the mark vector that the next GC cycle will use. Marked entries in the allocation vector indicate that the object is not free. Each allocation vector maintains a boundary between areas of the span already allocated from and areas not yet allocated from. As objects are allocated this boundary is moved until it reaches the end of the span. At this point further allocations will be done from another span. Since we no longer sweep a span inspecting each freed object the responsibility for maintaining pointer/scalar bits in the heapBitMap containing is now the responsibility of the the routines doing the actual allocation. This CL is functionally complete and ready for performance tuning. Change-Id: I336e0fc21eef1066e0b68c7067cc71b9f3d50e04 Reviewed-on: https://go-review.googlesource.com/19470 Reviewed-by: Austin Clements --- src/runtime/heapdump.go | 10 +- src/runtime/malloc.go | 36 +++-- src/runtime/mbitmap.go | 306 +++++++++++++++++++++++++--------------- src/runtime/mcache.go | 13 +- src/runtime/mcentral.go | 35 ++--- src/runtime/mgcmark.go | 10 +- src/runtime/mgcsweep.go | 72 ++++------ src/runtime/mheap.go | 16 ++- src/runtime/stack.go | 1 - 9 files changed, 281 insertions(+), 218 deletions(-) diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go index e6a41f7f97..96dd6ff867 100644 --- a/src/runtime/heapdump.go +++ b/src/runtime/heapdump.go @@ -472,9 +472,13 @@ func dumpobjs() { if n > uintptr(len(freemark)) { throw("freemark array doesn't have enough entries") } - for l := s.freelist; l.ptr() != nil; l = l.ptr().next { - freemark[(uintptr(l)-p)/size] = true + + for freeIndex := s.freeindex; freeIndex < s.nelems; freeIndex++ { + if s.isFree(freeIndex) { + freemark[freeIndex] = true + } } + for j := uintptr(0); j < n; j, p = j+1, p+size { if freemark[j] { freemark[j] = false @@ -709,7 +713,7 @@ func makeheapobjbv(p uintptr, size uintptr) bitvector { i := uintptr(0) hbits := heapBitsForAddr(p) for ; i < nptr; i++ { - if i >= 2 && !hbits.isMarked() { + if i >= 2 && !hbits.morePointers() { break // end of object } if hbits.isPointer() { diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 528a5b73ba..e635682cae 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -502,23 +502,34 @@ const ( // weight allocation. If it is a heavy weight allocation the caller must // determine whether a new GC cycle needs to be started or if the GC is active // whether this goroutine needs to assist the GC. -// https://golang.org/cl/5350 motivates why this routine should preform a -// prefetch. func (c *mcache) nextFree(sizeclass int8) (v gclinkptr, shouldhelpgc bool) { s := c.alloc[sizeclass] - v = s.freelist - if v.ptr() == nil { + shouldhelpgc = false + freeIndex := s.nextFreeIndex(s.freeindex) + + if freeIndex == s.nelems { + // The span is full. + if uintptr(s.ref) != s.nelems { + throw("s.ref != s.nelems && freeIndex == s.nelems") + } systemstack(func() { c.refill(int32(sizeclass)) }) shouldhelpgc = true s = c.alloc[sizeclass] - v = s.freelist + freeIndex = s.nextFreeIndex(s.freeindex) + } + if freeIndex >= s.nelems { + throw("freeIndex is not valid") } - s.freelist = v.ptr().next + + v = gclinkptr(freeIndex*s.elemsize + s.base()) + // Advance the freeIndex. + s.freeindex = freeIndex + 1 s.ref++ - // prefetchnta offers best performance, see change list message. - prefetchnta(uintptr(v.ptr().next)) + if uintptr(s.ref) > s.nelems { + throw("s.ref > s.nelems") + } return } @@ -655,10 +666,8 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer { v, shouldhelpgc = c.nextFree(sizeclass) x = unsafe.Pointer(v) if flags&flagNoZero == 0 { - v.ptr().next = 0 - if size > 2*sys.PtrSize && ((*[2]uintptr)(x))[1] != 0 { - memclr(unsafe.Pointer(v), size) - } + memclr(unsafe.Pointer(v), size) + // TODO:(rlh) Only clear if object is not known to be zeroed. } } } else { @@ -667,12 +676,13 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer { systemstack(func() { s = largeAlloc(size, flags) }) + s.freeindex = 1 x = unsafe.Pointer(uintptr(s.start << pageShift)) size = s.elemsize } if flags&flagNoScan != 0 { - // All objects are pre-marked as noscan. Nothing to do. + heapBitsSetTypeNoScan(uintptr(x), size) } else { // If allocating a defer+arg block, now that we've picked a malloc size // large enough to hold everything, cut the "asked for" size down to diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index a78efdc034..10446fee42 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -24,7 +24,7 @@ // In each 2-bit entry, the lower bit holds the same information as in the 1-bit // bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC. // The meaning of the high bit depends on the position of the word being described -// in its allocated object. In the first word, the high bit is the GC ``marked'' bit. +// in its allocated object. In the first word, the high bit is unused. // In the second word, the high bit is the GC ``checkmarked'' bit (see below). // In the third and later words, the high bit indicates that the object is still // being described. In these words, if a bit pair with a high bit 0 is encountered, @@ -33,12 +33,13 @@ // in the object are uninteresting to the garbage collector. // // The 2-bit entries are split when written into the byte, so that the top half -// of the byte contains 4 mark bits and the bottom half contains 4 pointer bits. +// of the byte contains 4 high bits and the bottom half contains 4 low (pointer) +// bits. // This form allows a copy from the 1-bit to the 4-bit form to keep the // pointer bits contiguous, instead of having to space them out. // // The code makes use of the fact that the zero value for a heap bitmap -// has no live pointer bit set and is (depending on position), not marked, +// has no live pointer bit set and is (depending on position), not used, // not checkmarked, and is the dead encoding. // These properties must be preserved when modifying the encoding. // @@ -63,6 +64,7 @@ // It is still used in general, except in checkmark the type bit is repurposed // as the checkmark bit and then reinitialized (to 1) as the type bit when // finished. +// package runtime @@ -254,16 +256,20 @@ func markBitsForAddr(p uintptr) markBits { func (s *mspan) markBitsForAddr(p uintptr) markBits { byteOffset := p - s.base() - markBitIndex := byteOffset / s.elemsize // TODO if hot spot use fancy divide.... - return s.markBitsForIndex(markBitIndex) -} - -func (s *mspan) markBitsForIndex(markBitIndex uintptr) markBits { + markBitIndex := uintptr(0) + if byteOffset != 0 { + // markBitIndex := (p - s.base()) / s.elemsize, using division by multiplication + markBitIndex = uintptr(uint64(byteOffset) >> s.divShift * uint64(s.divMul) >> s.divShift2) + } whichByte := markBitIndex / 8 whichBit := markBitIndex % 8 return markBits{&s.gcmarkBits[whichByte], uint8(1 << whichBit), markBitIndex} } +func (s *mspan) markBitsForBase() markBits { + return markBits{&s.gcmarkBits[0], uint8(1), 0} +} + // isMarked reports whether mark bit m is set. func (m markBits) isMarked() bool { return *m.bytep&m.mask != 0 @@ -307,6 +313,17 @@ func markBitsForSpan(base uintptr) (mbits markBits) { return mbits } +// advance advances the markBits to the next object in the span. +func (m *markBits) advance() { + if m.mask == 1<<7 { + m.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(m.bytep)) + 1)) + m.mask = 1 + } else { + m.mask = m.mask << 1 + } + m.index++ +} + // heapBitsForAddr returns the heapBits for the address addr. // The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used). // @@ -440,28 +457,13 @@ func (h heapBits) bits() uint32 { return uint32(*h.bitp) >> (h.shift & 31) } -// isMarked reports whether the heap bits have the marked bit set. -// h must describe the initial word of the object. -func (h heapBits) isMarked() bool { +// morePointers returns true if this word and all remaining words in this object +// are scalars. +// h must not describe the first or second word of the object. +func (h heapBits) morePointers() bool { return *h.bitp&(bitMarked<= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) { + if doCall { f(base + i*sys.PtrSize) } - if x&(bitMarked<= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) { + if doCall { f(base + (i+1)*sys.PtrSize) } - if x&(bitMarked<<(2*heapBitsShift)) != 0 { - x &^= bitMarked << (2 * heapBitsShift) - } else { + if cl != 0 { + nfree++ + } + } + mbits.advance() + if !(mbits.isMarked() || mbits.index >= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) { + if doCall { f(base + (i+2)*sys.PtrSize) } - if x&(bitMarked<<(3*heapBitsShift)) != 0 { - x &^= bitMarked << (3 * heapBitsShift) - } else { + if cl != 0 { + nfree++ + } + } + mbits.advance() + if !(mbits.isMarked() || mbits.index >= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) { + if doCall { f(base + (i+3)*sys.PtrSize) } - *bitp = uint8(x) - bitp = subtract1(bitp) + if cl != 0 { + nfree++ + } } + mbits.advance() + } + return +} - case size%(4*sys.PtrSize) == 0: - // Mark bit is in first word of each object. - // Each object starts at bit 0 of a heap bitmap byte. - bitp := h.bitp - step := size / heapBitmapScale - for i := uintptr(0); i < n; i++ { - x := uint32(*bitp) - if x&bitMarked != 0 { - x &^= bitMarked - } else { - x = 0 - f(base + i*size) +func (m *markBits) nextFreed(maxIndex uintptr, s *mspan) bool { + mByte := *m.bytep + for { + for mByte == 0xff { + if m.index >= maxIndex { + return false } - *bitp = uint8(x) - bitp = subtractb(bitp, step) + m.index = (m.index + 8) &^ (8 - 1) + m.mask = 1 + m.bytep = add1(m.bytep) + mByte = *m.bytep } - - case size%(4*sys.PtrSize) == 2*sys.PtrSize: - // Mark bit is in first word of each object, - // but every other object starts halfway through a heap bitmap byte. - // Unroll loop 2x to handle alternating shift count and step size. - bitp := h.bitp - step := size / heapBitmapScale - var i uintptr - for i = uintptr(0); i < n; i += 2 { - x := uint32(*bitp) - if x&bitMarked != 0 { - x &^= bitMarked - } else { - x &^= bitMarked | bitPointer | (bitMarked|bitPointer)< 2*sys.PtrSize { - x = 0 + if m.index >= maxIndex { + return false + } + for m.index < maxIndex { + if m.mask&mByte == 0 { + if m.index < s.freeindex { + return true + } + if s.allocBits[m.index/8]&m.mask != 0 { + return true } } - *bitp = uint8(x) - if i+1 >= n { + if m.mask == 1<<7 { + m.mask = 1 + m.bytep = add1(m.bytep) + mByte = *m.bytep + m.index++ break - } - bitp = subtractb(bitp, step) - x = uint32(*bitp) - if x&(bitMarked<<(2*heapBitsShift)) != 0 { - x &^= bitMarked << (2 * heapBitsShift) } else { - x &^= (bitMarked|bitPointer)<<(2*heapBitsShift) | (bitMarked|bitPointer)<<(3*heapBitsShift) - f(base + (i+1)*size) - if size > 2*sys.PtrSize { - *subtract1(bitp) = 0 - } + m.mask = m.mask << 1 + m.index++ } - *bitp = uint8(x) - bitp = subtractb(bitp, step+1) } } + return false +} + +func heapBitsSweepMap(h heapBits, s *mspan, base, size, n uintptr, cl uint8, doCall bool, f func(uintptr)) (nfree int) { + twobits := s.markBitsForBase() + for twobits.nextFreed(n, s) { + if doCall { + f(base + twobits.index*size) + } + if cl != 0 { + nfree++ + } + twobits.advance() + } + return } // heapBitsSetType records that the new allocation [x, x+size) @@ -862,7 +892,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // size is sizeof(_defer{}) (at least 6 words) and dataSize may be // arbitrarily larger. // - // The checks for size == ptrSize and size == 2*ptrSize can therefore + // The checks for size == sys.PtrSize and size == 2*sys.PtrSize can therefore // assume that dataSize == size without checking it explicitly. if sys.PtrSize == 8 && size == sys.PtrSize { @@ -902,10 +932,13 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // (In general the number of instances of typ being allocated is // dataSize/typ.size.) if sys.PtrSize == 4 && dataSize == sys.PtrSize { - // 1 pointer. + // 1 pointer object. On 32-bit machines clear the bit for the + // unused second word. if gcphase == _GCoff { + *h.bitp &^= (bitPointer | bitMarked | ((bitPointer | bitMarked) << heapBitsShift)) << h.shift *h.bitp |= bitPointer << h.shift } else { + atomic.And8(h.bitp, ^uint8((bitPointer|bitMarked|((bitPointer|bitMarked)<>= 2 nb -= 2 // Note: no bitMarker in hb because the first two words don't get markers from us. if gcphase == _GCoff { + *hbitp &^= uint8((bitPointer | (bitPointer << heapBitsShift)) << (2 * heapBitsShift)) *hbitp |= uint8(hb) } else { + atomic.And8(hbitp, ^(uint8(bitPointer|bitPointer< 2*sys.PtrSize { + *bitp &^= (bitPointer | bitMarked) << (2 * heapBitsShift) + } + } else if h.shift == 2 { + *bitp &^= bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift) + if size > 2*sys.PtrSize { + bitp = subtract1(bitp) + *bitp &^= bitPointer | bitMarked + } + } else { + throw("Type has unrecognized size") + } + } else { + throw("Type has unrecognized size") + } +} + var debugPtrmask struct { lock mutex data *byte @@ -1424,7 +1506,7 @@ func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize u // progToPointerMask returns the 1-bit pointer mask output by the GC program prog. // size the size of the region described by prog, in bytes. -// The resulting bitvector will have no more than size/ptrSize bits. +// The resulting bitvector will have no more than size/sys.PtrSize bits. func progToPointerMask(prog *byte, size uintptr) bitvector { n := (size/sys.PtrSize + 7) / 8 x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1] @@ -1560,7 +1642,7 @@ Run: // into a register and use that register for the entire loop // instead of repeatedly reading from memory. // Handling fewer than 8 bits here makes the general loop simpler. - // The cutoff is ptrSize*8 - 7 to guarantee that when we add + // The cutoff is sys.PtrSize*8 - 7 to guarantee that when we add // the pattern to a bit buffer holding at most 7 bits (a partial byte) // it will not overflow. src := dst @@ -1855,7 +1937,7 @@ func getgcmask(ep interface{}) (mask []byte) { if hbits.isPointer() { mask[i/sys.PtrSize] = 1 } - if i >= 2*sys.PtrSize && !hbits.isMarked() { + if i >= 2*sys.PtrSize && !hbits.morePointers() { mask = mask[:i/sys.PtrSize] break } diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go index 2230c5c200..424fa0efac 100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@ -108,9 +108,11 @@ func (c *mcache) refill(sizeclass int32) *mspan { _g_.m.locks++ // Return the current cached span to the central lists. s := c.alloc[sizeclass] - if s.freelist.ptr() != nil { - throw("refill on a nonempty span") + + if uintptr(s.ref) != s.nelems { + throw("refill of span with free space remaining") } + if s != &emptymspan { s.incache = false } @@ -120,10 +122,11 @@ func (c *mcache) refill(sizeclass int32) *mspan { if s == nil { throw("out of memory") } - if s.freelist.ptr() == nil { - println(s.ref, (s.npages<<_PageShift)/s.elemsize) - throw("empty span") + + if uintptr(s.ref) == s.nelems { + throw("span has no free space") } + c.alloc[sizeclass] = s _g_.m.locks-- return s diff --git a/src/runtime/mcentral.go b/src/runtime/mcentral.go index baca157db9..47d3ae2f81 100644 --- a/src/runtime/mcentral.go +++ b/src/runtime/mcentral.go @@ -18,7 +18,7 @@ import "runtime/internal/atomic" type mcentral struct { lock mutex sizeclass int32 - nonempty mSpanList // list of spans with a free object + nonempty mSpanList // list of spans with a free object, ie a nonempty free list empty mSpanList // list of spans with no free objects (or cached in an mcache) } @@ -67,7 +67,9 @@ retry: c.empty.insertBack(s) unlock(&c.lock) s.sweep(true) - if s.freelist.ptr() != nil { + freeIndex := s.nextFreeIndex(0) + if freeIndex != s.nelems { + s.freeindex = freeIndex goto havespan } lock(&c.lock) @@ -115,9 +117,6 @@ havespan: // heap_live changed. gcController.revise() } - if s.freelist.ptr() == nil { - throw("freelist empty") - } s.incache = true return s } @@ -150,15 +149,11 @@ func (c *mcentral) uncacheSpan(s *mspan) { // the latest generation. // If preserve=true, don't return the span to heap nor relink in MCentral lists; // caller takes care of it. -func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, preserve bool) bool { +func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, preserve bool, wasempty bool) bool { if s.incache { - throw("freespan into cached span") + throw("freeSpan given cached span") } - // Add the objects back to s's free list. - wasempty := s.freelist.ptr() == nil - end.ptr().next = s.freelist - s.freelist = start s.ref -= uint16(n) if preserve { @@ -190,16 +185,14 @@ func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, p return false } - // s is completely freed, return it to the heap. c.nonempty.remove(s) s.needzero = 1 - s.freelist = 0 unlock(&c.lock) mheap_.freeSpan(s, 0) return true } -// Fetch a new span from the heap and carve into objects for the free list. +// grow allocates a new empty span from the heap and initializes it for c's size class. func (c *mcentral) grow() *mspan { npages := uintptr(class_to_allocnpages[c.sizeclass]) size := uintptr(class_to_size[c.sizeclass]) @@ -212,19 +205,7 @@ func (c *mcentral) grow() *mspan { p := uintptr(s.start << _PageShift) s.limit = p + size*n - head := gclinkptr(p) - tail := gclinkptr(p) - // i==0 iteration already done - for i := uintptr(1); i < n; i++ { - p += size - tail.ptr().next = gclinkptr(p) - tail = gclinkptr(p) - } - if s.freelist.ptr() != nil { - throw("freelist not empty") - } - tail.ptr().next = 0 - s.freelist = head + heapBitsForSpan(s.base()).initSpan(s) return s } diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 66d61bae1e..fe8a56460b 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -1044,9 +1044,9 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork if obj&(sys.PtrSize-1) != 0 { throw("greyobject: obj not pointer-aligned") } - + mbits := span.markBitsForAddr(obj) if useCheckmark { - if !hbits.isMarked() { + if !mbits.isMarked() { printlock() print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n") print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n") @@ -1068,10 +1068,10 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork } } else { // If marked we have nothing to do. - if hbits.isMarked() { + if mbits.isMarked() { return } - hbits.setMarked() + mbits.setMarked() // If this is a noscan object, fast-track it to black // instead of greying it. @@ -1138,7 +1138,7 @@ func gcmarknewobject_m(obj, size uintptr) { if useCheckmark && !gcBlackenPromptly { // The world should be stopped so this should not happen. throw("gcmarknewobject called while doing checkmark") } - heapBitsForAddr(obj).setMarked() + markBitsForAddr(obj).setMarked() atomic.Xadd64(&work.bytesMarked, int64(size)) } diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index 31d1a80183..7a1a76cbad 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -192,16 +192,13 @@ func (s *mspan) sweep(preserve bool) bool { c := _g_.m.mcache freeToHeap := false - // Mark any free objects in this span so we don't collect them. - sstart := uintptr(s.start << _PageShift) - for link := s.freelist; link.ptr() != nil; link = link.ptr().next { - if uintptr(link) < sstart || s.limit <= uintptr(link) { - // Free list is corrupted. - dumpFreeList(s) - throw("free list corrupted") - } - heapBitsForAddr(uintptr(link)).setMarkedNonAtomic() - } + // The allocBits indicate which unmarked objects don't need to be + // processed since they were free at the end of the last GC cycle + // and were not allocated since then. + // If the allocBits index is >= s.freeindex and the bit + // is not marked then the object remains unallocated + // since the last GC. + // This situation is analogous to being on a freelist. // Unlink & free special records for any objects we're about to free. // Two complications here: @@ -216,8 +213,8 @@ func (s *mspan) sweep(preserve bool) bool { for special != nil { // A finalizer can be set for an inner byte of an object, find object beginning. p := uintptr(s.start<<_PageShift) + uintptr(special.offset)/size*size - hbits := heapBitsForAddr(p) - if !hbits.isMarked() { + mbits := s.markBitsForAddr(p) + if !mbits.isMarked() { // This object is not marked and has at least one special record. // Pass 1: see if it has at least one finalizer. hasFin := false @@ -225,7 +222,7 @@ func (s *mspan) sweep(preserve bool) bool { for tmp := special; tmp != nil && uintptr(tmp.offset) < endOffset; tmp = tmp.next { if tmp.kind == _KindSpecialFinalizer { // Stop freeing of object if it has a finalizer. - hbits.setMarkedNonAtomic() + mbits.setMarkedNonAtomic() hasFin = true break } @@ -259,8 +256,7 @@ func (s *mspan) sweep(preserve bool) bool { // This thread owns the span now, so it can manipulate // the block bitmap without atomic operations. - size, n, _ := s.layout() - heapBitsSweepSpan(s.base(), size, n, func(p uintptr) { + nfree = heapBitsSweepSpan(s, func(p uintptr) { // At this point we know that we are looking at garbage object // that needs to be collected. if debug.allocfreetrace != 0 { @@ -288,17 +284,18 @@ func (s *mspan) sweep(preserve bool) bool { } else if size > sys.PtrSize { *(*uintptr)(unsafe.Pointer(p + sys.PtrSize)) = 0 } - if head.ptr() == nil { - head = gclinkptr(p) - } else { - end.ptr().next = gclinkptr(p) - } - end = gclinkptr(p) - end.ptr().next = gclinkptr(0x0bade5) - nfree++ } }) + wasempty := s.nextFreeIndex(s.freeindex) == s.nelems + + s.freeindex = 0 // reset allocation index to start of span. + + // Swap role of allocBits with gcmarkBits + // Clear gcmarkBits in preparation for next GC + s.allocBits, s.gcmarkBits = s.gcmarkBits, s.allocBits + s.clearGCMarkBits() // prepare for next GC + // We need to set s.sweepgen = h.sweepgen only when all blocks are swept, // because of the potential for a concurrent free/SetFinalizer. // But we need to set it before we make the span available for allocation @@ -311,11 +308,14 @@ func (s *mspan) sweep(preserve bool) bool { print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") throw("MSpan_Sweep: bad span state after sweep") } + // Serialization point. + // At this point the mark bits are cleared and allocation ready + // to go so release the span. atomic.Store(&s.sweepgen, sweepgen) } if nfree > 0 { c.local_nsmallfree[cl] += uintptr(nfree) - res = mheap_.central[cl].mcentral.freeSpan(s, int32(nfree), head, end, preserve) + res = mheap_.central[cl].mcentral.freeSpan(s, int32(nfree), head, end, preserve, wasempty) // MCentral_FreeSpan updates sweepgen } else if freeToHeap { // Free large span to heap @@ -399,27 +399,3 @@ func reimburseSweepCredit(unusableBytes uintptr) { throw("spanBytesAlloc underflow") } } - -func dumpFreeList(s *mspan) { - printlock() - print("runtime: free list of span ", s, ":\n") - sstart := uintptr(s.start << _PageShift) - link := s.freelist - for i := 0; i < int(s.npages*_PageSize/s.elemsize); i++ { - if i != 0 { - print(" -> ") - } - print(hex(link)) - if link.ptr() == nil { - break - } - if uintptr(link) < sstart || s.limit <= uintptr(link) { - // Bad link. Stop walking before we crash. - print(" (BAD)") - break - } - link = link.ptr().next - } - print("\n") - printunlock() -} diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index a3d34a360e..d5dde5e72e 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -119,8 +119,7 @@ type mspan struct { start pageID // starting page number npages uintptr // number of pages in span - freelist gclinkptr // list of free objects for _MSpanInUse - stackfreelist gclinkptr // list of free stacks, avoids overloading freelist for _MSpanStack + stackfreelist gclinkptr // list of free stacks, avoids overloading freelist // freeindex is the slot index between 0 and nelems at which to begin scanning // for the next free object in this span. @@ -472,7 +471,6 @@ func (h *mheap) alloc_m(npage uintptr, sizeclass int32, large bool) *mspan { // able to map interior pointer to containing span. atomic.Store(&s.sweepgen, h.sweepgen) s.state = _MSpanInUse - s.freelist = 0 s.ref = 0 s.sizeclass = uint8(sizeclass) if sizeclass == 0 { @@ -914,7 +912,6 @@ func (span *mspan) init(start pageID, npages uintptr) { span.list = nil span.start = start span.npages = npages - span.freelist = 0 span.ref = 0 span.sizeclass = 0 span.incache = false @@ -925,6 +922,17 @@ func (span *mspan) init(start pageID, npages uintptr) { span.speciallock.key = 0 span.specials = nil span.needzero = 0 + span.freeindex = 0 + span.allocBits = &span.markbits1 + span.gcmarkBits = &span.markbits2 + // determine if this is actually needed. It is once / span so it + // isn't expensive. This is to be replaced by an arena + // based system where things can be cleared all at once so + // don't worry about optimizing this. + for i := 0; i < len(span.markbits1); i++ { + span.allocBits[i] = 0 + span.gcmarkBits[i] = 0 + } } func (span *mspan) inList() bool { diff --git a/src/runtime/stack.go b/src/runtime/stack.go index 5e373f1b94..8fd7ef2bcf 100644 --- a/src/runtime/stack.go +++ b/src/runtime/stack.go @@ -1137,7 +1137,6 @@ func freeStackSpans() { next := s.next if s.ref == 0 { list.remove(s) - s.freelist = 0 s.stackfreelist = 0 mheap_.freeStack(s) } -- 2.48.1