[dev.garbage] runtime: allocate directly from GC mark bits

author Rick Hudson <rlh@golang.org>

Thu, 11 Feb 2016 18:57:58 +0000 (13:57 -0500)

committer Rick Hudson <rlh@golang.org>

Wed, 27 Apr 2016 21:54:47 +0000 (21:54 +0000)
author Rick Hudson <rlh@golang.org>
Thu, 11 Feb 2016 18:57:58 +0000 (13:57 -0500)
committer Rick Hudson <rlh@golang.org>
Wed, 27 Apr 2016 21:54:47 +0000 (21:54 +0000)
diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go

index e6a41f7f97a2be9988e4990541cf706fdfe45620..96dd6ff867ec50fcd1190cc948244fc4bb44d0db 100644 (file)
--- a/src/runtime/heapdump.go
+++ b/src/runtime/heapdump.go
@@ -472,9 +472,13 @@ func dumpobjs() {
                 if n > uintptr(len(freemark)) {
                         throw("freemark array doesn't have enough entries")
                 }
-               for l := s.freelist; l.ptr() != nil; l = l.ptr().next {
-                       freemark[(uintptr(l)-p)/size] = true
+
+               for freeIndex := s.freeindex; freeIndex < s.nelems; freeIndex++ {
+                       if s.isFree(freeIndex) {
+                               freemark[freeIndex] = true
+                       }
                 }
+
                 for j := uintptr(0); j < n; j, p = j+1, p+size {
                         if freemark[j] {
                                 freemark[j] = false
@@ -709,7 +713,7 @@ func makeheapobjbv(p uintptr, size uintptr) bitvector {
         i := uintptr(0)
         hbits := heapBitsForAddr(p)
         for ; i < nptr; i++ {
-               if i >= 2 && !hbits.isMarked() {
+               if i >= 2 && !hbits.morePointers() {
                         break // end of object
                 }
                 if hbits.isPointer() {
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go

index 528a5b73bad7cd26cdcc82038bf83a08ac6d450f..e635682caec5ec017a62cac571f45b23479adb8a 100644 (file)
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -502,23 +502,34 @@ const (
  // weight allocation. If it is a heavy weight allocation the caller must
  // determine whether a new GC cycle needs to be started or if the GC is active
  // whether this goroutine needs to assist the GC.
-// https://golang.org/cl/5350 motivates why this routine should preform a
-// prefetch.
  func (c *mcache) nextFree(sizeclass int8) (v gclinkptr, shouldhelpgc bool) {
         s := c.alloc[sizeclass]
-       v = s.freelist
-       if v.ptr() == nil {
+       shouldhelpgc = false
+       freeIndex := s.nextFreeIndex(s.freeindex)
+
+       if freeIndex == s.nelems {
+               // The span is full.
+               if uintptr(s.ref) != s.nelems {
+                       throw("s.ref != s.nelems && freeIndex == s.nelems")
+               }
                 systemstack(func() {
                         c.refill(int32(sizeclass))
                 })
                 shouldhelpgc = true
                 s = c.alloc[sizeclass]
-               v = s.freelist
+               freeIndex = s.nextFreeIndex(s.freeindex)
+       }
+       if freeIndex >= s.nelems {
+               throw("freeIndex is not valid")
         }
-       s.freelist = v.ptr().next
+
+       v = gclinkptr(freeIndex*s.elemsize + s.base())
+       // Advance the freeIndex.
+       s.freeindex = freeIndex + 1
         s.ref++
-       // prefetchnta offers best performance, see change list message.
-       prefetchnta(uintptr(v.ptr().next))
+       if uintptr(s.ref) > s.nelems {
+               throw("s.ref > s.nelems")
+       }
         return
  }
  
@@ -655,10 +666,8 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
                         v, shouldhelpgc = c.nextFree(sizeclass)
                         x = unsafe.Pointer(v)
                         if flags&flagNoZero == 0 {
-                               v.ptr().next = 0
-                               if size > 2*sys.PtrSize && ((*[2]uintptr)(x))[1] != 0 {
-                                       memclr(unsafe.Pointer(v), size)
-                               }
+                               memclr(unsafe.Pointer(v), size)
+                               // TODO:(rlh) Only clear if object is not known to be zeroed.
                         }
                 }
         } else {
@@ -667,12 +676,13 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
                 systemstack(func() {
                         s = largeAlloc(size, flags)
                 })
+               s.freeindex = 1
                 x = unsafe.Pointer(uintptr(s.start << pageShift))
                 size = s.elemsize
         }
  
         if flags&flagNoScan != 0 {
-               // All objects are pre-marked as noscan. Nothing to do.
+               heapBitsSetTypeNoScan(uintptr(x), size)
         } else {
                 // If allocating a defer+arg block, now that we've picked a malloc size
                 // large enough to hold everything, cut the "asked for" size down to
diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go

index a78efdc034ccf154c3d1c88a46dd413a104ea441..10446fee42611e60fe4c59225b3923a2feb25295 100644 (file)
--- a/src/runtime/mbitmap.go
+++ b/src/runtime/mbitmap.go
@@ -24,7 +24,7 @@
  // In each 2-bit entry, the lower bit holds the same information as in the 1-bit
  // bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC.
  // The meaning of the high bit depends on the position of the word being described
-// in its allocated object. In the first word, the high bit is the GC ``marked'' bit.
+// in its allocated object. In the first word, the high bit is unused.
  // In the second word, the high bit is the GC ``checkmarked'' bit (see below).
  // In the third and later words, the high bit indicates that the object is still
  // being described. In these words, if a bit pair with a high bit 0 is encountered,
@@ -33,12 +33,13 @@
  // in the object are uninteresting to the garbage collector.
  //
  // The 2-bit entries are split when written into the byte, so that the top half
-// of the byte contains 4 mark bits and the bottom half contains 4 pointer bits.
+// of the byte contains 4 high bits and the bottom half contains 4 low (pointer)
+// bits.
  // This form allows a copy from the 1-bit to the 4-bit form to keep the
  // pointer bits contiguous, instead of having to space them out.
  //
  // The code makes use of the fact that the zero value for a heap bitmap
-// has no live pointer bit set and is (depending on position), not marked,
+// has no live pointer bit set and is (depending on position), not used,
  // not checkmarked, and is the dead encoding.
  // These properties must be preserved when modifying the encoding.
  //
@@ -63,6 +64,7 @@
  // It is still used in general, except in checkmark the type bit is repurposed
  // as the checkmark bit and then reinitialized (to 1) as the type bit when
  // finished.
+//
  
  package runtime
  
@@ -254,16 +256,20 @@ func markBitsForAddr(p uintptr) markBits {
  
  func (s *mspan) markBitsForAddr(p uintptr) markBits {
         byteOffset := p - s.base()
-       markBitIndex := byteOffset / s.elemsize // TODO if hot spot use fancy divide....
-       return s.markBitsForIndex(markBitIndex)
-}
-
-func (s *mspan) markBitsForIndex(markBitIndex uintptr) markBits {
+       markBitIndex := uintptr(0)
+       if byteOffset != 0 {
+               // markBitIndex := (p - s.base()) / s.elemsize, using division by multiplication
+               markBitIndex = uintptr(uint64(byteOffset) >> s.divShift * uint64(s.divMul) >> s.divShift2)
+       }
         whichByte := markBitIndex / 8
         whichBit := markBitIndex % 8
         return markBits{&s.gcmarkBits[whichByte], uint8(1 << whichBit), markBitIndex}
  }
  
+func (s *mspan) markBitsForBase() markBits {
+       return markBits{&s.gcmarkBits[0], uint8(1), 0}
+}
+
  // isMarked reports whether mark bit m is set.
  func (m markBits) isMarked() bool {
         return *m.bytep&m.mask != 0
@@ -307,6 +313,17 @@ func markBitsForSpan(base uintptr) (mbits markBits) {
         return mbits
  }
  
+// advance advances the markBits to the next object in the span.
+func (m *markBits) advance() {
+       if m.mask == 1<<7 {
+               m.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(m.bytep)) + 1))
+               m.mask = 1
+       } else {
+               m.mask = m.mask << 1
+       }
+       m.index++
+}
+
  // heapBitsForAddr returns the heapBits for the address addr.
  // The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used).
  //
@@ -440,28 +457,13 @@ func (h heapBits) bits() uint32 {
         return uint32(*h.bitp) >> (h.shift & 31)
  }
  
-// isMarked reports whether the heap bits have the marked bit set.
-// h must describe the initial word of the object.
-func (h heapBits) isMarked() bool {
+// morePointers returns true if this word and all remaining words in this object
+// are scalars.
+// h must not describe the first or second word of the object.
+func (h heapBits) morePointers() bool {
         return *h.bitp&(bitMarked<<h.shift) != 0
  }
  
-// setMarked sets the marked bit in the heap bits, atomically.
-// h must describe the initial word of the object.
-func (h heapBits) setMarked() {
-       // Each byte of GC bitmap holds info for four words.
-       // Might be racing with other updates, so use atomic update always.
-       // We used to be clever here and use a non-atomic update in certain
-       // cases, but it's not worth the risk.
-       atomic.Or8(h.bitp, bitMarked<<h.shift)
-}
-
-// setMarkedNonAtomic sets the marked bit in the heap bits, non-atomically.
-// h must describe the initial word of the object.
-func (h heapBits) setMarkedNonAtomic() {
-       *h.bitp |= bitMarked << h.shift
-}
-
  // isPointer reports whether the heap bits describe a pointer word.
  // h must describe the initial word of the object.
  //
@@ -733,106 +735,134 @@ func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
         }
  }
  
-// heapBitsSweepSpan coordinates the sweeping of a span by reading
-// and updating the corresponding heap bitmap entries.
-// For each free object in the span, heapBitsSweepSpan sets the type
-// bits for the first four words (less for smaller objects) to scalar/dead
-// and then calls f(p), where p is the object's base address.
-// f is expected to add the object to a free list.
-// For non-free objects, heapBitsSweepSpan turns off the marked bit.
-func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) {
+// heapBitsSweepSpan coordinates the sweeping of a span and inspects
+// each freed object. If objects are being traced or if msan is enabled
+// then heapBitsSweepSpan calls f(p), where p is the object's base address.
+// When not tracing and msan is not enabled heapBitsSweepSpan is lightweight.
+// heapBitsSweepSpan never alters the pointer/scalar heapBit maps. HeapBit map
+// maintenance is the responsibility of the allocation routines.
+// TODO:(rlh) Deal with the checkmark bits but moving them
+// out of heap bitmap thus enabling bulk clearing.
+func heapBitsSweepSpan(s *mspan, f func(uintptr)) (nfree int) {
+       base := s.base()
+       size := s.elemsize
+       n := s.nelems
+       cl := s.sizeclass
+       doCall := debug.allocfreetrace != 0 || msanenabled || cl == 0
+
         h := heapBitsForSpan(base)
         switch {
         default:
                 throw("heapBitsSweepSpan")
         case sys.PtrSize == 8 && size == sys.PtrSize:
-               // Consider mark bits in all four 2-bit entries of each bitmap byte.
-               bitp := h.bitp
-               for i := uintptr(0); i < n; i += 4 {
-                       x := uint32(*bitp)
-                       // Note that unlike the other size cases, we leave the pointer bits set here.
-                       // These are initialized during initSpan when the span is created and left
-                       // in place the whole time the span is used for pointer-sized objects.
-                       // That lets heapBitsSetType avoid an atomic update to set the pointer bit
-                       // during allocation.
-                       if x&bitMarked != 0 {
-                               x &^= bitMarked
-                       } else {
+               nfree = heapBitsSweep8BitPtrs(h, s, base, n, cl, doCall, f)
+       case size%(4*sys.PtrSize) == 0:
+               nfree = heapBitsSweepMap(h, s, base, size, n, cl, doCall, f)
+       case size%(4*sys.PtrSize) == 2*sys.PtrSize:
+               nfree = heapBitsSweepMap(h, s, base, size, n, cl, doCall, f)
+       }
+       return
+}
+
+func heapBitsSweep8BitPtrs(h heapBits, s *mspan, base, n uintptr, cl uint8, doCall bool, f func(uintptr)) (nfree int) {
+       mbits := s.markBitsForBase()
+       for i := uintptr(0); i < n; i += 4 {
+               // Note that unlike the other size cases, we leave the pointer bits set here.
+               // These are initialized during initSpan when the span is created and left
+               // in place the whole time the span is used for pointer-sized objects.
+               // That lets heapBitsSetType avoid an atomic update to set the pointer bit
+               // during allocation.
+               if !(mbits.isMarked() || mbits.index >= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) {
+                       if doCall {
                                 f(base + i*sys.PtrSize)
                         }
-                       if x&(bitMarked<<heapBitsShift) != 0 {
-                               x &^= bitMarked << heapBitsShift
-                       } else {
+                       if cl != 0 {
+                               nfree++
+                       }
+               }
+               mbits.advance()
+               if !(mbits.isMarked() || mbits.index >= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) {
+                       if doCall {
                                 f(base + (i+1)*sys.PtrSize)
                         }
-                       if x&(bitMarked<<(2*heapBitsShift)) != 0 {
-                               x &^= bitMarked << (2 * heapBitsShift)
-                       } else {
+                       if cl != 0 {
+                               nfree++
+                       }
+               }
+               mbits.advance()
+               if !(mbits.isMarked() || mbits.index >= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) {
+                       if doCall {
                                 f(base + (i+2)*sys.PtrSize)
                         }
-                       if x&(bitMarked<<(3*heapBitsShift)) != 0 {
-                               x &^= bitMarked << (3 * heapBitsShift)
-                       } else {
+                       if cl != 0 {
+                               nfree++
+                       }
+               }
+               mbits.advance()
+               if !(mbits.isMarked() || mbits.index >= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) {
+                       if doCall {
                                 f(base + (i+3)*sys.PtrSize)
                         }
-                       *bitp = uint8(x)
-                       bitp = subtract1(bitp)
+                       if cl != 0 {
+                               nfree++
+                       }
                 }
+               mbits.advance()
+       }
+       return
+}
  
-       case size%(4*sys.PtrSize) == 0:
-               // Mark bit is in first word of each object.
-               // Each object starts at bit 0 of a heap bitmap byte.
-               bitp := h.bitp
-               step := size / heapBitmapScale
-               for i := uintptr(0); i < n; i++ {
-                       x := uint32(*bitp)
-                       if x&bitMarked != 0 {
-                               x &^= bitMarked
-                       } else {
-                               x = 0
-                               f(base + i*size)
+func (m *markBits) nextFreed(maxIndex uintptr, s *mspan) bool {
+       mByte := *m.bytep
+       for {
+               for mByte == 0xff {
+                       if m.index >= maxIndex {
+                               return false
                         }
-                       *bitp = uint8(x)
-                       bitp = subtractb(bitp, step)
+                       m.index = (m.index + 8) &^ (8 - 1)
+                       m.mask = 1
+                       m.bytep = add1(m.bytep)
+                       mByte = *m.bytep
                 }
-
-       case size%(4*sys.PtrSize) == 2*sys.PtrSize:
-               // Mark bit is in first word of each object,
-               // but every other object starts halfway through a heap bitmap byte.
-               // Unroll loop 2x to handle alternating shift count and step size.
-               bitp := h.bitp
-               step := size / heapBitmapScale
-               var i uintptr
-               for i = uintptr(0); i < n; i += 2 {
-                       x := uint32(*bitp)
-                       if x&bitMarked != 0 {
-                               x &^= bitMarked
-                       } else {
-                               x &^= bitMarked | bitPointer | (bitMarked|bitPointer)<<heapBitsShift
-                               f(base + i*size)
-                               if size > 2*sys.PtrSize {
-                                       x = 0
+               if m.index >= maxIndex {
+                       return false
+               }
+               for m.index < maxIndex {
+                       if m.mask&mByte == 0 {
+                               if m.index < s.freeindex {
+                                       return true
+                               }
+                               if s.allocBits[m.index/8]&m.mask != 0 {
+                                       return true
                                 }
                         }
-                       *bitp = uint8(x)
-                       if i+1 >= n {
+                       if m.mask == 1<<7 {
+                               m.mask = 1
+                               m.bytep = add1(m.bytep)
+                               mByte = *m.bytep
+                               m.index++
                                 break
-                       }
-                       bitp = subtractb(bitp, step)
-                       x = uint32(*bitp)
-                       if x&(bitMarked<<(2*heapBitsShift)) != 0 {
-                               x &^= bitMarked << (2 * heapBitsShift)
                         } else {
-                               x &^= (bitMarked|bitPointer)<<(2*heapBitsShift) | (bitMarked|bitPointer)<<(3*heapBitsShift)
-                               f(base + (i+1)*size)
-                               if size > 2*sys.PtrSize {
-                                       *subtract1(bitp) = 0
-                               }
+                               m.mask = m.mask << 1
+                               m.index++
                         }
-                       *bitp = uint8(x)
-                       bitp = subtractb(bitp, step+1)
                 }
         }
+       return false
+}
+
+func heapBitsSweepMap(h heapBits, s *mspan, base, size, n uintptr, cl uint8, doCall bool, f func(uintptr)) (nfree int) {
+       twobits := s.markBitsForBase()
+       for twobits.nextFreed(n, s) {
+               if doCall {
+                       f(base + twobits.index*size)
+               }
+               if cl != 0 {
+                       nfree++
+               }
+               twobits.advance()
+       }
+       return
  }
  
  // heapBitsSetType records that the new allocation [x, x+size)
@@ -862,7 +892,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
         // size is sizeof(_defer{}) (at least 6 words) and dataSize may be
         // arbitrarily larger.
         //
-       // The checks for size == ptrSize and size == 2*ptrSize can therefore
+       // The checks for size == sys.PtrSize and size == 2*sys.PtrSize can therefore
         // assume that dataSize == size without checking it explicitly.
  
         if sys.PtrSize == 8 && size == sys.PtrSize {
@@ -902,10 +932,13 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
                         // (In general the number of instances of typ being allocated is
                         // dataSize/typ.size.)
                         if sys.PtrSize == 4 && dataSize == sys.PtrSize {
-                               // 1 pointer.
+                               // 1 pointer object. On 32-bit machines clear the bit for the
+                               // unused second word.
                                 if gcphase == _GCoff {
+                                       *h.bitp &^= (bitPointer | bitMarked | ((bitPointer | bitMarked) << heapBitsShift)) << h.shift
                                         *h.bitp |= bitPointer << h.shift
                                 } else {
+                                       atomic.And8(h.bitp, ^uint8((bitPointer|bitMarked|((bitPointer|bitMarked)<<heapBitsShift))<<h.shift))
                                         atomic.Or8(h.bitp, bitPointer<<h.shift)
                                 }
                         } else {
@@ -918,7 +951,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
                         }
                         return
                 }
-               // Otherwise typ.size must be 2*ptrSize, and typ.kind&kindGCProg == 0.
+               // Otherwise typ.size must be 2*sys.PtrSize,
+               // and typ.kind&kindGCProg == 0.
                 if doubleCheck {
                         if typ.size != 2*sys.PtrSize || typ.kind&kindGCProg != 0 {
                                 print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n")
@@ -928,8 +962,19 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
                 b := uint32(*ptrmask)
                 hb := b & 3
                 if gcphase == _GCoff {
+                       // bitPointer == 1, bitMarked is 1 << 4, heapBitsShift is 1.
+                       // 110011 is shifted h.shift and complemented.
+                       // This clears out the bits that are about to be
+                       // ored into *h.hbitp in the next instructions.
+                       *h.bitp &^= (bitPointer | bitMarked | ((bitPointer | bitMarked) << heapBitsShift)) << h.shift
                         *h.bitp |= uint8(hb << h.shift)
                 } else {
+                       // TODO:(rlh) since the GC is not concurrently setting the
+                       // mark bits in the heap map anymore and malloc
+                       // owns the span we are allocating in why does this have
+                       // to be atomic?
+
+                       atomic.And8(h.bitp, ^uint8((bitPointer|bitMarked|((bitPointer|bitMarked)<<heapBitsShift))<<h.shift))
                         atomic.Or8(h.bitp, uint8(hb<<h.shift))
                 }
                 return
@@ -1043,8 +1088,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
                         // Replicate ptrmask to fill entire pbits uintptr.
                         // Doubling and truncating is fewer steps than
                         // iterating by nb each time. (nb could be 1.)
-                       // Since we loaded typ.ptrdata/ptrSize bits
-                       // but are pretending to have typ.size/ptrSize,
+                       // Since we loaded typ.ptrdata/sys.PtrSize bits
+                       // but are pretending to have typ.size/sys.PtrSize,
                         // there might be no replication necessary/possible.
                         pbits = b
                         endnb = nb
@@ -1135,13 +1180,15 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
                 // not with its mark bit. Since there is only one allocation
                 // from a given span at a time, we should be able to set
                 // these bits non-atomically. Not worth the risk right now.
-               hb = (b & 3) << (2 * heapBitsShift)
+               hb = (b & (bitPointer | bitPointer<<heapBitsShift)) << (2 * heapBitsShift)
                 b >>= 2
                 nb -= 2
                 // Note: no bitMarker in hb because the first two words don't get markers from us.
                 if gcphase == _GCoff {
+                       *hbitp &^= uint8((bitPointer | (bitPointer << heapBitsShift)) << (2 * heapBitsShift))
                         *hbitp |= uint8(hb)
                 } else {
+                       atomic.And8(hbitp, ^(uint8(bitPointer|bitPointer<<heapBitsShift) << (2 * heapBitsShift)))
                         atomic.Or8(hbitp, uint8(hb))
                 }
                 hbitp = subtract1(hbitp)
@@ -1331,6 +1378,41 @@ Phase4:
         }
  }
  
+// heapBitsSetTypeNoScan marks x as noscan. For objects with 1 or 2
+// words set their bitPointers to off (0).
+// All other objects have the first 3 bitPointers set to
+// off (0) and the scan word in the third word
+// also set to off (0).
+func heapBitsSetTypeNoScan(x, size uintptr) {
+       h := heapBitsForAddr(uintptr(x))
+       bitp := h.bitp
+
+       if sys.PtrSize == 8 && size == sys.PtrSize {
+               // If this is truely noScan the tinyAlloc logic should have noticed
+               // and combined such objects.
+               throw("noscan object is too small")
+       } else if size%(4*sys.PtrSize) == 0 {
+               *bitp &^= bitPointer | bitPointer<<heapBitsShift | (bitMarked|bitPointer)<<(2*heapBitsShift)
+       } else if size%(4*sys.PtrSize) == 2*sys.PtrSize {
+               if h.shift == 0 {
+                       *bitp &^= (bitPointer | bitPointer<<heapBitsShift)
+                       if size > 2*sys.PtrSize {
+                               *bitp &^= (bitPointer | bitMarked) << (2 * heapBitsShift)
+                       }
+               } else if h.shift == 2 {
+                       *bitp &^= bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift)
+                       if size > 2*sys.PtrSize {
+                               bitp = subtract1(bitp)
+                               *bitp &^= bitPointer | bitMarked
+                       }
+               } else {
+                       throw("Type has unrecognized size")
+               }
+       } else {
+               throw("Type has unrecognized size")
+       }
+}
+
  var debugPtrmask struct {
         lock mutex
         data *byte
@@ -1424,7 +1506,7 @@ func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize u
  
  // progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
  // size the size of the region described by prog, in bytes.
-// The resulting bitvector will have no more than size/ptrSize bits.
+// The resulting bitvector will have no more than size/sys.PtrSize bits.
  func progToPointerMask(prog *byte, size uintptr) bitvector {
         n := (size/sys.PtrSize + 7) / 8
         x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1]
@@ -1560,7 +1642,7 @@ Run:
                 // into a register and use that register for the entire loop
                 // instead of repeatedly reading from memory.
                 // Handling fewer than 8 bits here makes the general loop simpler.
-               // The cutoff is ptrSize*8 - 7 to guarantee that when we add
+               // The cutoff is sys.PtrSize*8 - 7 to guarantee that when we add
                 // the pattern to a bit buffer holding at most 7 bits (a partial byte)
                 // it will not overflow.
                 src := dst
@@ -1855,7 +1937,7 @@ func getgcmask(ep interface{}) (mask []byte) {
                         if hbits.isPointer() {
                                 mask[i/sys.PtrSize] = 1
                         }
-                       if i >= 2*sys.PtrSize && !hbits.isMarked() {
+                       if i >= 2*sys.PtrSize && !hbits.morePointers() {
                                 mask = mask[:i/sys.PtrSize]
                                 break
                         }
diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go

index 2230c5c200d2cbd0de19001c13597419f0b886d7..424fa0efac7d5510322553206c9679cace2f26d1 100644 (file)
--- a/src/runtime/mcache.go
+++ b/src/runtime/mcache.go
@@ -108,9 +108,11 @@ func (c *mcache) refill(sizeclass int32) *mspan {
         _g_.m.locks++
         // Return the current cached span to the central lists.
         s := c.alloc[sizeclass]
-       if s.freelist.ptr() != nil {
-               throw("refill on a nonempty span")
+
+       if uintptr(s.ref) != s.nelems {
+               throw("refill of span with free space remaining")
         }
+
         if s != &emptymspan {
                 s.incache = false
         }
@@ -120,10 +122,11 @@ func (c *mcache) refill(sizeclass int32) *mspan {
         if s == nil {
                 throw("out of memory")
         }
-       if s.freelist.ptr() == nil {
-               println(s.ref, (s.npages<<_PageShift)/s.elemsize)
-               throw("empty span")
+
+       if uintptr(s.ref) == s.nelems {
+               throw("span has no free space")
         }
+
         c.alloc[sizeclass] = s
         _g_.m.locks--
         return s
diff --git a/src/runtime/mcentral.go b/src/runtime/mcentral.go

index baca157db972f3b206f59158ce24e51f7d7c1c2b..47d3ae2f81deac32f12e034f2b2a69a8de69efc9 100644 (file)
--- a/src/runtime/mcentral.go
+++ b/src/runtime/mcentral.go
@@ -18,7 +18,7 @@ import "runtime/internal/atomic"
  type mcentral struct {
         lock      mutex
         sizeclass int32
-       nonempty  mSpanList // list of spans with a free object
+       nonempty  mSpanList // list of spans with a free object, ie a nonempty free list
         empty     mSpanList // list of spans with no free objects (or cached in an mcache)
  }
  
@@ -67,7 +67,9 @@ retry:
                         c.empty.insertBack(s)
                         unlock(&c.lock)
                         s.sweep(true)
-                       if s.freelist.ptr() != nil {
+                       freeIndex := s.nextFreeIndex(0)
+                       if freeIndex != s.nelems {
+                               s.freeindex = freeIndex
                                 goto havespan
                         }
                         lock(&c.lock)
@@ -115,9 +117,6 @@ havespan:
                 // heap_live changed.
                 gcController.revise()
         }
-       if s.freelist.ptr() == nil {
-               throw("freelist empty")
-       }
         s.incache = true
         return s
  }
@@ -150,15 +149,11 @@ func (c *mcentral) uncacheSpan(s *mspan) {
  // the latest generation.
  // If preserve=true, don't return the span to heap nor relink in MCentral lists;
  // caller takes care of it.
-func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, preserve bool) bool {
+func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, preserve bool, wasempty bool) bool {
         if s.incache {
-               throw("freespan into cached span")
+               throw("freeSpan given cached span")
         }
  
-       // Add the objects back to s's free list.
-       wasempty := s.freelist.ptr() == nil
-       end.ptr().next = s.freelist
-       s.freelist = start
         s.ref -= uint16(n)
  
         if preserve {
@@ -190,16 +185,14 @@ func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, p
                 return false
         }
  
-       // s is completely freed, return it to the heap.
         c.nonempty.remove(s)
         s.needzero = 1
-       s.freelist = 0
         unlock(&c.lock)
         mheap_.freeSpan(s, 0)
         return true
  }
  
-// Fetch a new span from the heap and carve into objects for the free list.
+// grow allocates a new empty span from the heap and initializes it for c's size class.
  func (c *mcentral) grow() *mspan {
         npages := uintptr(class_to_allocnpages[c.sizeclass])
         size := uintptr(class_to_size[c.sizeclass])
@@ -212,19 +205,7 @@ func (c *mcentral) grow() *mspan {
  
         p := uintptr(s.start << _PageShift)
         s.limit = p + size*n
-       head := gclinkptr(p)
-       tail := gclinkptr(p)
-       // i==0 iteration already done
-       for i := uintptr(1); i < n; i++ {
-               p += size
-               tail.ptr().next = gclinkptr(p)
-               tail = gclinkptr(p)
-       }
-       if s.freelist.ptr() != nil {
-               throw("freelist not empty")
-       }
-       tail.ptr().next = 0
-       s.freelist = head
+
         heapBitsForSpan(s.base()).initSpan(s)
         return s
  }
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go

index 66d61bae1e81e71d4a145d531f3fd6c03f09062c..fe8a56460ba0eafe6f59d67304865bff9fbac173 100644 (file)
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -1044,9 +1044,9 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork
         if obj&(sys.PtrSize-1) != 0 {
                 throw("greyobject: obj not pointer-aligned")
         }
-
+       mbits := span.markBitsForAddr(obj)
         if useCheckmark {
-               if !hbits.isMarked() {
+               if !mbits.isMarked() {
                         printlock()
                         print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n")
                         print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n")
@@ -1068,10 +1068,10 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork
                 }
         } else {
                 // If marked we have nothing to do.
-               if hbits.isMarked() {
+               if mbits.isMarked() {
                         return
                 }
-               hbits.setMarked()
+               mbits.setMarked()
  
                 // If this is a noscan object, fast-track it to black
                 // instead of greying it.
@@ -1138,7 +1138,7 @@ func gcmarknewobject_m(obj, size uintptr) {
         if useCheckmark && !gcBlackenPromptly { // The world should be stopped so this should not happen.
                 throw("gcmarknewobject called while doing checkmark")
         }
-       heapBitsForAddr(obj).setMarked()
+       markBitsForAddr(obj).setMarked()
         atomic.Xadd64(&work.bytesMarked, int64(size))
  }
  
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go

index 31d1a801831790ac1dbbb3f41d9191fe4b4f8fdd..7a1a76cbadbfdd11e0c88c083ff93ee386f01c42 100644 (file)
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -192,16 +192,13 @@ func (s *mspan) sweep(preserve bool) bool {
         c := _g_.m.mcache
         freeToHeap := false
  
-       // Mark any free objects in this span so we don't collect them.
-       sstart := uintptr(s.start << _PageShift)
-       for link := s.freelist; link.ptr() != nil; link = link.ptr().next {
-               if uintptr(link) < sstart || s.limit <= uintptr(link) {
-                       // Free list is corrupted.
-                       dumpFreeList(s)
-                       throw("free list corrupted")
-               }
-               heapBitsForAddr(uintptr(link)).setMarkedNonAtomic()
-       }
+       // The allocBits indicate which unmarked objects don't need to be
+       // processed since they were free at the end of the last GC cycle
+       // and were not allocated since then.
+       // If the allocBits index is >= s.freeindex and the bit
+       // is not marked then the object remains unallocated
+       // since the last GC.
+       // This situation is analogous to being on a freelist.
  
         // Unlink & free special records for any objects we're about to free.
         // Two complications here:
@@ -216,8 +213,8 @@ func (s *mspan) sweep(preserve bool) bool {
         for special != nil {
                 // A finalizer can be set for an inner byte of an object, find object beginning.
                 p := uintptr(s.start<<_PageShift) + uintptr(special.offset)/size*size
-               hbits := heapBitsForAddr(p)
-               if !hbits.isMarked() {
+               mbits := s.markBitsForAddr(p)
+               if !mbits.isMarked() {
                         // This object is not marked and has at least one special record.
                         // Pass 1: see if it has at least one finalizer.
                         hasFin := false
@@ -225,7 +222,7 @@ func (s *mspan) sweep(preserve bool) bool {
                         for tmp := special; tmp != nil && uintptr(tmp.offset) < endOffset; tmp = tmp.next {
                                 if tmp.kind == _KindSpecialFinalizer {
                                         // Stop freeing of object if it has a finalizer.
-                                       hbits.setMarkedNonAtomic()
+                                       mbits.setMarkedNonAtomic()
                                         hasFin = true
                                         break
                                 }
@@ -259,8 +256,7 @@ func (s *mspan) sweep(preserve bool) bool {
         // This thread owns the span now, so it can manipulate
         // the block bitmap without atomic operations.
  
-       size, n, _ := s.layout()
-       heapBitsSweepSpan(s.base(), size, n, func(p uintptr) {
+       nfree = heapBitsSweepSpan(s, func(p uintptr) {
                 // At this point we know that we are looking at garbage object
                 // that needs to be collected.
                 if debug.allocfreetrace != 0 {
@@ -288,17 +284,18 @@ func (s *mspan) sweep(preserve bool) bool {
                         } else if size > sys.PtrSize {
                                 *(*uintptr)(unsafe.Pointer(p + sys.PtrSize)) = 0
                         }
-                       if head.ptr() == nil {
-                               head = gclinkptr(p)
-                       } else {
-                               end.ptr().next = gclinkptr(p)
-                       }
-                       end = gclinkptr(p)
-                       end.ptr().next = gclinkptr(0x0bade5)
-                       nfree++
                 }
         })
  
+       wasempty := s.nextFreeIndex(s.freeindex) == s.nelems
+
+       s.freeindex = 0 // reset allocation index to start of span.
+
+       // Swap role of allocBits with gcmarkBits
+       // Clear gcmarkBits in preparation for next GC
+       s.allocBits, s.gcmarkBits = s.gcmarkBits, s.allocBits
+       s.clearGCMarkBits() // prepare for next GC
+
         // We need to set s.sweepgen = h.sweepgen only when all blocks are swept,
         // because of the potential for a concurrent free/SetFinalizer.
         // But we need to set it before we make the span available for allocation
@@ -311,11 +308,14 @@ func (s *mspan) sweep(preserve bool) bool {
                         print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
                         throw("MSpan_Sweep: bad span state after sweep")
                 }
+               // Serialization point.
+               // At this point the mark bits are cleared and allocation ready
+               // to go so release the span.
                 atomic.Store(&s.sweepgen, sweepgen)
         }
         if nfree > 0 {
                 c.local_nsmallfree[cl] += uintptr(nfree)
-               res = mheap_.central[cl].mcentral.freeSpan(s, int32(nfree), head, end, preserve)
+               res = mheap_.central[cl].mcentral.freeSpan(s, int32(nfree), head, end, preserve, wasempty)
                 // MCentral_FreeSpan updates sweepgen
         } else if freeToHeap {
                 // Free large span to heap
@@ -399,27 +399,3 @@ func reimburseSweepCredit(unusableBytes uintptr) {
                 throw("spanBytesAlloc underflow")
         }
  }
-
-func dumpFreeList(s *mspan) {
-       printlock()
-       print("runtime: free list of span ", s, ":\n")
-       sstart := uintptr(s.start << _PageShift)
-       link := s.freelist
-       for i := 0; i < int(s.npages*_PageSize/s.elemsize); i++ {
-               if i != 0 {
-                       print(" -> ")
-               }
-               print(hex(link))
-               if link.ptr() == nil {
-                       break
-               }
-               if uintptr(link) < sstart || s.limit <= uintptr(link) {
-                       // Bad link. Stop walking before we crash.
-                       print(" (BAD)")
-                       break
-               }
-               link = link.ptr().next
-       }
-       print("\n")
-       printunlock()
-}
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go

index a3d34a360ecbb013f87e3a772a7dfc0a926669d9..d5dde5e72e1e3dba43dd3cd2f7c8e863b6d8944b 100644 (file)
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -119,8 +119,7 @@ type mspan struct {
  
         start         pageID    // starting page number
         npages        uintptr   // number of pages in span
-       freelist      gclinkptr // list of free objects for _MSpanInUse
-       stackfreelist gclinkptr // list of free stacks, avoids overloading freelist for _MSpanStack
+       stackfreelist gclinkptr // list of free stacks, avoids overloading freelist
  
         // freeindex is the slot index between 0 and nelems at which to begin scanning
         // for the next free object in this span.
@@ -472,7 +471,6 @@ func (h *mheap) alloc_m(npage uintptr, sizeclass int32, large bool) *mspan {
                 // able to map interior pointer to containing span.
                 atomic.Store(&s.sweepgen, h.sweepgen)
                 s.state = _MSpanInUse
-               s.freelist = 0
                 s.ref = 0
                 s.sizeclass = uint8(sizeclass)
                 if sizeclass == 0 {
@@ -914,7 +912,6 @@ func (span *mspan) init(start pageID, npages uintptr) {
         span.list = nil
         span.start = start
         span.npages = npages
-       span.freelist = 0
         span.ref = 0
         span.sizeclass = 0
         span.incache = false
@@ -925,6 +922,17 @@ func (span *mspan) init(start pageID, npages uintptr) {
         span.speciallock.key = 0
         span.specials = nil
         span.needzero = 0
+       span.freeindex = 0
+       span.allocBits = &span.markbits1
+       span.gcmarkBits = &span.markbits2
+       // determine if this is actually needed. It is once / span so it
+       // isn't expensive. This is to be replaced by an arena
+       // based system where things can be cleared all at once so
+       // don't worry about optimizing this.
+       for i := 0; i < len(span.markbits1); i++ {
+               span.allocBits[i] = 0
+               span.gcmarkBits[i] = 0
+       }
  }
  
  func (span *mspan) inList() bool {
diff --git a/src/runtime/stack.go b/src/runtime/stack.go

index 5e373f1b9402795be74f0ec054a3e93746357984..8fd7ef2bcf62422fe21599666d46e3711e2429c7 100644 (file)
--- a/src/runtime/stack.go
+++ b/src/runtime/stack.go
@@ -1137,7 +1137,6 @@ func freeStackSpans() {
                         next := s.next
                         if s.ref == 0 {
                                 list.remove(s)
-                               s.freelist = 0
                                 s.stackfreelist = 0
                                 mheap_.freeStack(s)
                         }
author	Rick Hudson <rlh@golang.org>
	Thu, 11 Feb 2016 18:57:58 +0000 (13:57 -0500)
committer	Rick Hudson <rlh@golang.org>
	Wed, 27 Apr 2016 21:54:47 +0000 (21:54 +0000)
src/runtime/heapdump.go		patch \| blob \| history
src/runtime/malloc.go		patch \| blob \| history
src/runtime/mbitmap.go		patch \| blob \| history
src/runtime/mcache.go		patch \| blob \| history
src/runtime/mcentral.go		patch \| blob \| history
src/runtime/mgcmark.go		patch \| blob \| history
src/runtime/mgcsweep.go		patch \| blob \| history
src/runtime/mheap.go		patch \| blob \| history
src/runtime/stack.go		patch \| blob \| history