// since the arena starts at address 0.
_MaxMem = 1<<_MHeapMap_TotalBits - 1
+ // memLimitBits is the maximum number of bits in a heap address.
+ //
+ // On 64-bit platforms, we limit this to 48 bits because that
+ // is the maximum supported by Linux across all 64-bit
+ // architectures, with the exception of s390x.
+ // s390x supports full 64-bit addresses, but the allocator
+ // will panic in the unlikely event we exceed 48 bits.
+ //
+ // On 32-bit platforms, we accept the full 32-bit address
+ // space because doing so is cheap.
+ // mips32 only has access to the low 2GB of virtual memory, so
+ // we further limit it to 31 bits.
+ //
+ // The size of the arena index is proportional to
+ // 1<<memLimitBits, so it's important that this not be too
+ // large. 48 bits is about the threshold; above that we would
+ // need to go to a two level arena index.
+ memLimitBits = _64bit*48 + (1-_64bit)*(32-(sys.GoarchMips+sys.GoarchMipsle))
+
+ // memLimit is one past the highest possible heap pointer value.
+ memLimit = 1 << memLimitBits
+
+ // heapArenaBytes is the size of a heap arena. The heap
+ // consists of mappings of size heapArenaBytes, aligned to
+ // heapArenaBytes. The initial heap mapping is one arena.
+ //
+ // TODO: Right now only the bitmap is divided into separate
+ // arenas, but shortly all of the heap will be.
+ heapArenaBytes = (64<<20)*_64bit + (4<<20)*(1-_64bit)
+
+ // heapArenaBitmapBytes is the size of each heap arena's bitmap.
+ heapArenaBitmapBytes = heapArenaBytes / (sys.PtrSize * 8 / 2)
+
// Max number of threads to run garbage collection.
// 2, 3, and 4 are all plausible maximums depending
// on the hardware details of the machine. The garbage
testdefersizes()
+ if heapArenaBitmapBytes&(heapArenaBitmapBytes-1) != 0 {
+ // heapBits expects modular arithmetic on bitmap
+ // addresses to work.
+ throw("heapArenaBitmapBytes not a power of 2")
+ }
+
// Copy class sizes out for statistics table.
for i := range class_to_size {
memstats.by_size[i].size = uint32(class_to_size[i])
// The spans array holds one *mspan per _PageSize of arena.
var spansSize uintptr = (_MaxMem + 1) / _PageSize * sys.PtrSize
spansSize = round(spansSize, _PageSize)
- // The bitmap holds 2 bits per word of arena.
- var bitmapSize uintptr = (_MaxMem + 1) / (sys.PtrSize * 8 / 2)
- bitmapSize = round(bitmapSize, _PageSize)
// Set up the allocation arena, a contiguous area of memory where
// allocated data will be found.
// not collecting memory because some non-pointer block of memory
// had a bit pattern that matched a memory address.
//
- // Actually we reserve 544 GB (because the bitmap ends up being 32 GB)
- // but it hardly matters: e0 00 is not valid UTF-8 either.
- //
// If this fails we fall back to the 32 bit memory mechanism
//
// However, on arm64, we ignore all this advice above and slam the
// translation buffers, the user address space is limited to 39 bits
// On darwin/arm64, the address space is even smaller.
arenaSize := round(_MaxMem, _PageSize)
- pSize = bitmapSize + spansSize + arenaSize + _PageSize
+ pSize = spansSize + arenaSize + _PageSize
for i := 0; i <= 0x7f; i++ {
switch {
case GOARCH == "arm64" && GOOS == "darwin":
// away from the running binary image and then round up
// to a MB boundary.
p = round(firstmoduledata.end+(1<<18), 1<<20)
- pSize = bitmapSize + spansSize + arenaSize + _PageSize
+ pSize = spansSize + arenaSize + _PageSize
if p <= procBrk && procBrk < p+pSize {
// Move the start above the brk,
// leaving some room for future brk
spansStart := p1
p1 += spansSize
- mheap_.bitmap_start = p1
- p1 += bitmapSize
if sys.PtrSize == 4 {
// Set arena_start such that we can accept memory
// reservations located anywhere in the 4GB virtual space.
mheap_.arena_alloc = p1
mheap_.arena_reserved = reserved
- // Pre-compute the value heapBitsForAddr can use to directly
- // map a heap address to a bitmap address. The obvious
- // computation is:
- //
- // bitp = bitmap_start + (addr - arena_start)/ptrSize/4
- //
- // We can shuffle this to
- //
- // bitp = (bitmap_start - arena_start/ptrSize/4) + addr/ptrSize/4
- //
- // bitmap_delta is the value of the first term.
- mheap_.bitmap_delta = mheap_.bitmap_start - mheap_.arena_start/heapBitmapScale
-
if mheap_.arena_start&(_PageSize-1) != 0 {
- println("bad pagesize", hex(p), hex(p1), hex(spansSize), hex(bitmapSize), hex(_PageSize), "start", hex(mheap_.arena_start))
+ println("bad pagesize", hex(p), hex(p1), hex(spansSize), hex(_PageSize), "start", hex(mheap_.arena_start))
throw("misrounded allocation in mallocinit")
}
+ // Map the arena index. Most of this will never be touched.
+ var untracked uint64
+ mheap_.arenas = (*[memLimit / heapArenaBytes]*heapArena)(persistentalloc(unsafe.Sizeof(*mheap_.arenas), sys.PtrSize, &untracked))
+ if mheap_.arenas == nil {
+ throw("failed to allocate arena index")
+ }
+
// Initialize the rest of the allocator.
mheap_.init(spansStart, spansSize)
_g_ := getg()
//
// Heap bitmap
//
-// The allocated heap comes from a subset of the memory in the range [start, used),
-// where start == mheap_.arena_start and used == mheap_.arena_used.
-// The heap bitmap comprises 2 bits for each pointer-sized word in that range,
-// stored in bytes indexed forward in memory from bitmap_start.
-// That is, the byte at address bitmap holds the 2-bit entries for the
-// four words start through start+3*ptrSize, the byte at
-// bitmap_start+1 holds the entries for start+4*ptrSize through
-// start+7*ptrSize, and so on.
+// The heap bitmap comprises 2 bits for each pointer-sized word in the heap,
+// stored in the heapArena metadata backing each heap arena.
+// That is, if ha is the heapArena for the arena starting a start,
+// then ha.bitmap[0] holds the 2-bit entries for the four words start
+// through start+3*ptrSize, ha.bitmap[1] holds the entries for
+// start+4*ptrSize through start+7*ptrSize, and so on.
//
// In each 2-bit entry, the lower bit holds the same information as in the 1-bit
// bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC.
bitPointer = 1 << 0
bitScan = 1 << 4
- heapBitsShift = 1 // shift offset between successive bitPointer or bitScan entries
- heapBitmapScale = sys.PtrSize * (8 / 2) // number of data bytes described by one heap bitmap byte
- wordsPerBitmapByte = 8 / 2 // heap words described by one bitmap byte
+ heapBitsShift = 1 // shift offset between successive bitPointer or bitScan entries
+ wordsPerBitmapByte = 8 / 2 // heap words described by one bitmap byte
// all scan/pointer bits in a byte
bitScanAll = bitScan | bitScan<<heapBitsShift | bitScan<<(2*heapBitsShift) | bitScan<<(3*heapBitsShift)
return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1))
}
-// mapBits maps any additional bitmap memory needed for the new arena memory.
-//
-// Don't call this directly. Call mheap.setArenaUsed.
-//
-//go:nowritebarrier
-func (h *mheap) mapBits(arena_used uintptr) {
- // Caller has added extra mappings to the arena.
- // Add extra mappings of bitmap words as needed.
- // We allocate extra bitmap pieces in chunks of bitmapChunk.
- const bitmapChunk = 8192
-
- n := (arena_used - mheap_.arena_start) / heapBitmapScale
- n = round(n, bitmapChunk)
- n = round(n, physPageSize)
- if h.bitmap_mapped >= n {
- return
- }
-
- sysMap(unsafe.Pointer(h.bitmap_start+h.bitmap_mapped), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys)
- h.bitmap_mapped = n
-}
-
// heapBits provides access to the bitmap bits for a single heap word.
// The methods on heapBits take value receivers so that the compiler
// can more easily inline calls to those methods and registerize the
type heapBits struct {
bitp *uint8
shift uint32
+ arena uint32 // Index of heap arena containing bitp
+ last *uint8 // Last byte arena's bitmap
}
+// Make the compiler check that heapBits.arena is large enough to hold
+// the maximum arena index.
+var _ = heapBits{arena: memLimit / heapArenaBytes}
+
// markBits provides access to the mark bit for an object in the heap.
// bytep points to the byte holding the mark bit.
// mask is a byte with a single bit set that can be &ed with *bytep
}
// heapBitsForAddr returns the heapBits for the address addr.
-// The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used).
+// The caller must ensure addr is in an allocated span.
+// In particular, be careful not to point past the end of an object.
//
// nosplit because it is used during write barriers and must not be preempted.
//go:nosplit
func heapBitsForAddr(addr uintptr) heapBits {
// 2 bits per word, 4 pairs per byte, and a mask is hard coded.
off := addr / sys.PtrSize
- return heapBits{(*uint8)(unsafe.Pointer(mheap_.bitmap_delta + off/4)), uint32(off & 3)}
+ arena := addr / heapArenaBytes
+ ha := mheap_.arenas[arena]
+ // The compiler uses a load for nil checking ha, but in this
+ // case we'll almost never hit that cache line again, so it
+ // makes more sense to do a value check.
+ if ha == nil {
+ // addr is not in the heap. Crash without inhibiting inlining.
+ _ = *ha
+ }
+ bitp := &ha.bitmap[(off/4)%heapArenaBitmapBytes]
+ last := &ha.bitmap[len(ha.bitmap)-1]
+ return heapBits{bitp, uint32(off & 3), uint32(arena), last}
}
// heapBitsForSpan returns the heapBits for the span base address base.
//go:nosplit
func (h heapBits) next() heapBits {
if h.shift < 3*heapBitsShift {
- return heapBits{h.bitp, h.shift + heapBitsShift}
+ h.shift += heapBitsShift
+ } else if h.bitp != h.last {
+ h.bitp, h.shift = add1(h.bitp), 0
+ } else {
+ // Move to the next arena.
+ h.arena++
+ a := mheap_.arenas[h.arena]
+ if a == nil {
+ // We just passed the end of the object, which
+ // was also the end of the heap. Poison h. It
+ // should never be dereferenced at this point.
+ h.bitp, h.last = nil, nil
+ } else {
+ h.bitp, h.shift = &a.bitmap[0], 0
+ h.last = &a.bitmap[len(a.bitmap)-1]
+ }
}
- return heapBits{add1(h.bitp), 0}
+ return h
}
// forward returns the heapBits describing n pointer-sized words ahead of h in memory.
// h.forward(1) is equivalent to h.next(), just slower.
// Note that forward does not modify h. The caller must record the result.
// bits returns the heap bits for the current word.
+//go:nosplit
func (h heapBits) forward(n uintptr) heapBits {
n += uintptr(h.shift) / heapBitsShift
- return heapBits{addb(h.bitp, n/4), uint32(n%4) * heapBitsShift}
+ nbitp := uintptr(unsafe.Pointer(h.bitp)) + n/4
+ h.shift = uint32(n%4) * heapBitsShift
+ if nbitp <= uintptr(unsafe.Pointer(h.last)) {
+ h.bitp = (*uint8)(unsafe.Pointer(nbitp))
+ return h
+ }
+
+ // We're in a new heap arena.
+ past := nbitp - (uintptr(unsafe.Pointer(h.last)) + 1)
+ h.arena += 1 + uint32(past/heapArenaBitmapBytes)
+ a := mheap_.arenas[h.arena]
+ if a == nil {
+ h.bitp, h.last = nil, nil
+ } else {
+ h.bitp = &a.bitmap[past%heapArenaBitmapBytes]
+ h.last = &a.bitmap[len(a.bitmap)-1]
+ }
+ return h
}
// forwardOrBoundary is like forward, but stops at boundaries between
// contiguous sections of the bitmap. It returns the number of words
// advanced over, which will be <= n.
func (h heapBits) forwardOrBoundary(n uintptr) (heapBits, uintptr) {
- // The bitmap is contiguous right now, so this is just forward.
+ maxn := 4 * ((uintptr(unsafe.Pointer(h.last)) + 1) - uintptr(unsafe.Pointer(h.bitp)))
+ if n > maxn {
+ n = maxn
+ }
return h.forward(n), n
}
// This is a lot of lines of code, but it compiles into relatively few
// machine instructions.
+ outOfPlace := false
+ if (x+size-1)/heapArenaBytes != uintptr(h.arena) {
+ // This object spans heap arenas, so the bitmap may be
+ // discontiguous. Unroll it into the object instead
+ // and then copy it out.
+ outOfPlace = true
+ h.bitp = (*uint8)(unsafe.Pointer(x))
+ h.last = nil
+ }
+
var (
// Ptrmask input.
p *byte // last ptrmask byte read
}
ptrmask = debugPtrmask.data
runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1)
- goto Phase4
}
- return
+ goto Phase4
}
// Note about sizes:
nw = 2
}
- // Phase 1: Special case for leading byte (shift==0) or half-byte (shift==4).
+ // Phase 1: Special case for leading byte (shift==0) or half-byte (shift==2).
// The leading byte is special because it contains the bits for word 1,
// which does not have the scan bit set.
// The leading half-byte is special because it's a half a byte,
}
Phase4:
- // Phase 4: all done, but perhaps double check.
+ // Phase 4: Copy unrolled bitmap to per-arena bitmaps, if necessary.
+ if outOfPlace {
+ // TODO: We could probably make this faster by
+ // handling [x+dataSize, x+size) specially.
+ h := heapBitsForAddr(x)
+ // cnw is the number of heap words, or bit pairs
+ // remaining (like nw above).
+ cnw := size / sys.PtrSize
+ src := (*uint8)(unsafe.Pointer(x))
+ // We know the first and last byte of the bitmap are
+ // not the same, but it's still possible for small
+ // objects span arenas, so it may share bitmap bytes
+ // with neighboring objects.
+ //
+ // Handle the first byte specially if it's shared. See
+ // Phase 1 for why this is the only special case we need.
+ if doubleCheck {
+ if !(h.shift == 0 || (sys.PtrSize == 8 && h.shift == 2)) {
+ print("x=", x, " size=", size, " cnw=", h.shift, "\n")
+ throw("bad start shift")
+ }
+ }
+ if sys.PtrSize == 8 && h.shift == 2 {
+ *hbitp = *hbitp&^((bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift)<<(2*heapBitsShift)) | *src
+ h = h.next().next()
+ cnw -= 2
+ src = addb(src, 1)
+ }
+ // We're now byte aligned. Copy out to per-arena
+ // bitmaps until the last byte (which may again be
+ // partial).
+ for cnw >= 4 {
+ hNext, words := h.forwardOrBoundary(cnw)
+
+ // n is the number of bitmap bytes to copy.
+ n := words / 4
+ memmove(unsafe.Pointer(h.bitp), unsafe.Pointer(src), n)
+ cnw -= words
+ h = hNext
+ src = addb(src, n)
+ }
+ // Handle the last byte if it's shared.
+ if cnw == 2 {
+ *h.bitp = *h.bitp&^(bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift) | *src
+ src = addb(src, 1)
+ h = h.next().next()
+ }
+ if doubleCheck {
+ if uintptr(unsafe.Pointer(src)) > x+size {
+ throw("copy exceeded object size")
+ }
+ if !(cnw == 0 || cnw == 2) {
+ print("x=", x, " size=", size, " cnw=", cnw, "\n")
+ throw("bad number of remaining words")
+ }
+ // Set up hbitp so doubleCheck code below can check it.
+ hbitp = h.bitp
+ }
+ // Zero the object where we wrote the bitmap.
+ memclrNoHeapPointers(unsafe.Pointer(x), uintptr(unsafe.Pointer(src))-x)
+ }
+
+ // Double check the whole bitmap.
if doubleCheck {
- end := heapBitsForAddr(x + size)
+ // x+size may not point to the heap, so back up one
+ // word and then call next().
+ end := heapBitsForAddr(x + size - sys.PtrSize).next()
+ if !outOfPlace && (end.bitp == nil || (end.shift == 0 && end.bitp == &mheap_.arenas[end.arena].bitmap[0])) {
+ // The unrolling code above walks hbitp just
+ // past the bitmap without moving to the next
+ // arena. Synthesize this for end.bitp.
+ end.bitp = addb(&mheap_.arenas[end.arena-1].bitmap[0], heapArenaBitmapBytes)
+ end.arena--
+ end.last = nil
+ }
if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) {
println("ended at wrong bitmap byte for", typ.string(), "x", dataSize/typ.size)
print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
if have != want {
println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size)
print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
- print("kindGCProg=", typ.kind&kindGCProg != 0, "\n")
+ print("kindGCProg=", typ.kind&kindGCProg != 0, " outOfPlace=", outOfPlace, "\n")
print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
h0 := heapBitsForAddr(x)
print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
totalBits = (elemSize*(count-1) + progSize) / sys.PtrSize
}
endProg := unsafe.Pointer(addb(h.bitp, (totalBits+3)/4))
- endAlloc := unsafe.Pointer(addb(h.bitp, allocSize/heapBitmapScale))
+ endAlloc := unsafe.Pointer(addb(h.bitp, allocSize/sys.PtrSize/wordsPerBitmapByte))
memclrNoHeapPointers(endProg, uintptr(endAlloc)-uintptr(endProg))
}
nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
// range of addresses we might see in the heap
- bitmap_start uintptr // Points to first byte of bitmap
- bitmap_mapped uintptr
- bitmap_delta uintptr // Used to map heap address to bitmap address
// The arena_* fields indicate the addresses of the Go heap.
//
// here and *must* clobber it to use it.
arena_reserved bool
+ // arenas is the heap arena index. arenas[va/heapArenaBytes]
+ // points to the metadata for the heap arena containing va.
+ //
+ // For regions of the address space that are not backed by the
+ // Go heap, the arena index contains nil.
+ //
+ // Modifications are protected by mheap_.lock. Reads can be
+ // performed without locking; however, a given entry can
+ // transition from nil to non-nil at any time when the lock
+ // isn't held. (Entries never transitions back to nil.)
+ //
+ // This structure is fully mapped by mallocinit, so it's safe
+ // to probe any index.
+ arenas *[memLimit / heapArenaBytes]*heapArena
+
//_ uint32 // ensure 64-bit alignment
// central free lists for small size classes.
var mheap_ mheap
+// A heapArena stores metadata for a heap arena. heapArenas are stored
+// outside of the Go heap and accessed via the mheap_.arenas index.
+//
+// This gets allocated directly from the OS, so ideally it should be a
+// multiple of the system page size. For example, avoid adding small
+// fields.
+//
+//go:notinheap
+type heapArena struct {
+ // bitmap stores the pointer/scalar bitmap for the words in
+ // this arena. See mbitmap.go for a description. Use the
+ // heapBits type to access this.
+ bitmap [heapArenaBitmapBytes]byte
+
+ // TODO: Also store the spans map here.
+}
+
// An MSpan is a run of pages.
//
// When a MSpan is in the heap free list, state == MSpanFree
// avoids faults when other threads try access these regions immediately
// after observing the change to arena_used.
- // Map the bitmap.
- h.mapBits(arena_used)
+ // Allocate heap arena metadata.
+ for ri := h.arena_used / heapArenaBytes; ri < (arena_used+heapArenaBytes-1)/heapArenaBytes; ri++ {
+ if h.arenas[ri] != nil {
+ continue
+ }
+ r := (*heapArena)(persistentalloc(unsafe.Sizeof(heapArena{}), sys.PtrSize, &memstats.gc_sys))
+ if r == nil {
+ throw("runtime: out of memory allocating heap arena metadata")
+ }
+ // Store atomically just in case an object from the
+ // new heap arena becomes visible before the heap lock
+ // is released (which shouldn't happen, but there's
+ // little downside to this).
+ atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri]), unsafe.Pointer(r))
+ }
// Map spans array.
h.mapSpans(arena_used)