// typeptrdata returns the length in bytes of the prefix of t
// containing pointer data. Anything after this offset is scalar data.
-func typeptrdata(t *Type) uint64 {
+func typeptrdata(t *Type) int64 {
if !haspointers(t) {
return 0
}
TFUNC,
TCHAN,
TMAP:
- return uint64(Widthptr)
+ return int64(Widthptr)
case TSTRING:
// struct { byte *str; intgo len; }
- return uint64(Widthptr)
+ return int64(Widthptr)
case TINTER:
// struct { Itab *tab; void *data; } or
// struct { Type *type; void *data; }
- return 2 * uint64(Widthptr)
+ return 2 * int64(Widthptr)
case TARRAY:
if Isslice(t) {
// struct { byte *array; uintgo len; uintgo cap; }
- return uint64(Widthptr)
+ return int64(Widthptr)
}
// haspointers already eliminated t.Bound == 0.
- return uint64(t.Bound-1)*uint64(t.Type.Width) + typeptrdata(t.Type)
+ return (t.Bound-1)*t.Type.Width + typeptrdata(t.Type)
case TSTRUCT:
// Find the last field that has pointers.
lastPtrField = t1
}
}
- return uint64(lastPtrField.Width) + typeptrdata(lastPtrField.Type)
+ return lastPtrField.Width + typeptrdata(lastPtrField.Type)
default:
Fatal("typeptrdata: unexpected type, %v", t)
// zero unsafe.Pointer
// }
ot = duintptr(s, ot, uint64(t.Width))
- ot = duintptr(s, ot, typeptrdata(t))
+ ot = duintptr(s, ot, uint64(typeptrdata(t)))
ot = duint32(s, ot, typehash(t))
ot = duint8(s, ot, 0) // unused
}
// Calculate size of the unrolled GC mask.
- nptr := (t.Width + int64(Widthptr) - 1) / int64(Widthptr)
-
- size := (nptr + 7) / 8
+ nptr := typeptrdata(t) / int64(Widthptr)
// Decide whether to use unrolled GC mask or GC program.
// We could use a more elaborate condition, but this seems to work well in practice.
- // For small objects GC program can't give significant reduction.
- // While large objects usually contain arrays; and even if it don't
- // the program uses 2-bits per word while mask uses 4-bits per word,
- // so the program is still smaller.
- return size > int64(2*Widthptr)
+ // For small objects, the GC program can't give significant reduction.
+ return nptr > int64(2*Widthptr*8)
}
// Generates GC bitmask (1 bit per word).
return
}
- vec := bvalloc(2 * int32(Widthptr) * 8)
+ vec := bvalloc(int32(2 * Widthptr * 8))
xoffset := int64(0)
onebitwalktype1(t, &xoffset, vec)
- nptr := (t.Width + int64(Widthptr) - 1) / int64(Widthptr)
+ nptr := typeptrdata(t) / int64(Widthptr)
for i := int64(0); i < nptr; i++ {
if bvget(vec, int32(i)) == 1 {
gcmask[i/8] |= 1 << (uint(i) % 8)
verifyGCInfo(t, "data eface", &dataEface, infoEface)
verifyGCInfo(t, "data iface", &dataIface, infoIface)
- verifyGCInfo(t, "stack ScalarPtr", new(ScalarPtr), nonStackInfo(infoScalarPtr))
- verifyGCInfo(t, "stack PtrScalar", new(PtrScalar), nonStackInfo(infoPtrScalar))
- verifyGCInfo(t, "stack BigStruct", new(BigStruct), nonStackInfo(infoBigStruct()))
- verifyGCInfo(t, "stack string", new(string), nonStackInfo(infoString))
- verifyGCInfo(t, "stack slice", new([]string), nonStackInfo(infoSlice))
- verifyGCInfo(t, "stack eface", new(interface{}), nonStackInfo(infoEface))
- verifyGCInfo(t, "stack iface", new(Iface), nonStackInfo(infoIface))
+ verifyGCInfo(t, "stack ScalarPtr", new(ScalarPtr), infoScalarPtr)
+ verifyGCInfo(t, "stack PtrScalar", new(PtrScalar), infoPtrScalar)
+ verifyGCInfo(t, "stack BigStruct", new(BigStruct), infoBigStruct())
+ verifyGCInfo(t, "stack string", new(string), infoString)
+ verifyGCInfo(t, "stack slice", new([]string), infoSlice)
+ verifyGCInfo(t, "stack eface", new(interface{}), infoEface)
+ verifyGCInfo(t, "stack iface", new(Iface), infoIface)
for i := 0; i < 10; i++ {
- verifyGCInfo(t, "heap PtrSlice", escape(&make([]*byte, 10)[0]), infoPtr10)
- verifyGCInfo(t, "heap ScalarPtr", escape(new(ScalarPtr)), infoScalarPtr)
- verifyGCInfo(t, "heap ScalarPtrSlice", escape(&make([]ScalarPtr, 4)[0]), infoScalarPtr4)
- verifyGCInfo(t, "heap PtrScalar", escape(new(PtrScalar)), infoPtrScalar)
- verifyGCInfo(t, "heap BigStruct", escape(new(BigStruct)), infoBigStruct())
- verifyGCInfo(t, "heap string", escape(new(string)), infoString)
- verifyGCInfo(t, "heap eface", escape(new(interface{})), infoEface)
- verifyGCInfo(t, "heap iface", escape(new(Iface)), infoIface)
+ verifyGCInfo(t, "heap PtrSlice", escape(&make([]*byte, 10)[0]), trimDead(infoPtr10))
+ verifyGCInfo(t, "heap ScalarPtr", escape(new(ScalarPtr)), trimDead(infoScalarPtr))
+ verifyGCInfo(t, "heap ScalarPtrSlice", escape(&make([]ScalarPtr, 4)[0]), trimDead(infoScalarPtr4))
+ verifyGCInfo(t, "heap PtrScalar", escape(new(PtrScalar)), trimDead(infoPtrScalar))
+ verifyGCInfo(t, "heap BigStruct", escape(new(BigStruct)), trimDead(infoBigStruct()))
+ verifyGCInfo(t, "heap string", escape(new(string)), trimDead(infoString))
+ verifyGCInfo(t, "heap eface", escape(new(interface{})), trimDead(infoEface))
+ verifyGCInfo(t, "heap iface", escape(new(Iface)), trimDead(infoIface))
}
}
}
}
-func nonStackInfo(mask []byte) []byte {
- // typeDead is replaced with typeScalar everywhere except stacks.
- mask1 := make([]byte, len(mask))
- for i, v := range mask {
- if v == typeDead {
- v = typeScalar
- }
- mask1[i] = v
+func trimDead(mask []byte) []byte {
+ for len(mask) > 2 && mask[len(mask)-1] == typeScalar {
+ mask = mask[:len(mask)-1]
}
- return mask1
+ return mask
}
var gcinfoSink interface{}
// but if the start or end of x shares a bitmap byte with an adjacent
// object, the GC marker is racing with updates to those object's mark bits.
func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
+ const doubleCheck = false // slow but helpful; enable to test modifications to this function
+
// From here till marked label marking the object as allocated
// and storing type info in the GC bitmap.
h := heapBitsForAddr(x)
ptrmask := (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask
if typ.kind&kindGCProg != 0 {
- nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize
+ nptr := typ.ptrdata / ptrSize
masksize := (nptr + 7) / 8
masksize++ // unroll flag in the beginning
if masksize > maxGCMask && typ.gc[1] != 0 {
// In general, one load can supply two bitmap byte writes.
// This is a lot of lines of code, but it compiles into relatively few
// machine instructions.
+
+ // Ptrmask buffer.
var (
p *byte // last ptrmask byte read
b uintptr // ptrmask bits already loaded
- nb uint32 // number of bits in b at next read
+ nb uintptr // number of bits in b at next read
endp *byte // final ptrmask byte to read (then repeat)
- endnb uint32 // number of valid bits in *endp
+ endnb uintptr // number of valid bits in *endp
pbits uintptr // alternate source of bits
)
+ // Note about sizes:
+ //
+ // typ.size is the number of words in the object,
+ // and typ.ptrdata is the number of words in the prefix
+ // of the object that contains pointers. That is, the final
+ // typ.size - typ.ptrdata words contain no pointers.
+ // This allows optimization of a common pattern where
+ // an object has a small header followed by a large scalar
+ // buffer. If we know the pointers are over, we don't have
+ // to scan the buffer's heap bitmap at all.
+ // The 1-bit ptrmasks are sized to contain only bits for
+ // the typ.ptrdata prefix, zero padded out to a full byte
+ // of bitmap. This code sets nw (below) so that heap bitmap
+ // bits are only written for the typ.ptrdata prefix; if there is
+ // more room in the allocated object, the next heap bitmap
+ // entry is a 00, indicating that there are no more pointers
+ // to scan. So only the ptrmask for the ptrdata bytes is needed.
+ //
+ // Replicated copies are not as nice: if there is an array of
+ // objects with scalar tails, all but the last tail does have to
+ // be initialized, because there is no way to say "skip forward".
+ // However, because of the possibility of a repeated type with
+ // size not a multiple of 4 pointers (one heap bitmap byte),
+ // the code already must handle the last ptrmask byte specially
+ // by treating it as containing only the bits for endnb pointers,
+ // where endnb <= 4. We represent large scalar tails that must
+ // be expanded in the replication by setting endnb larger than 4.
+ // This will have the effect of reading many bits out of b,
+ // but once the real bits are shifted out, b will supply as many
+ // zero bits as we try to read, which is exactly what we need.
+
p = ptrmask
if typ.size < dataSize {
// Filling in bits for an array of typ.
// Set up for repetition of ptrmask during main loop.
- if typ.size/ptrSize+7 <= ptrSize*8 {
- // Entire ptrmask + a leftover fragment fits in uintptr.
+ // Note that ptrmask describes only a prefix of
+ const maxBits = ptrSize*8 - 7
+ if typ.ptrdata/ptrSize <= maxBits {
+ // Entire ptrmask fits in uintptr with room for a byte fragment.
// Load into pbits and never read from ptrmask again.
// This is especially important when the ptrmask has
// fewer than 8 bits in it; otherwise the reload in the middle
// at least 8 bits.
// Accumulate ptrmask into b.
- nb = uint32(typ.size / ptrSize)
- for i := uint32(0); i < nb; i += 8 {
+ // ptrmask is sized to describe only typ.ptrdata, but we record
+ // it as describing typ.size bytes, since all the high bits are zero.
+ nb = typ.ptrdata / ptrSize
+ for i := uintptr(0); i < nb; i += 8 {
b |= uintptr(*p) << i
p = addb(p, 1)
}
+ nb = typ.size / ptrSize
// Replicate ptrmask to fill entire pbits uintptr.
// Doubling and truncating is fewer steps than
// iterating by nb each time. (nb could be 1.)
+ // Since we loaded typ.ptrdata/ptrSize bits
+ // but are pretending to have typ.size/ptrSize,
+ // there might be no replication necessary/possible.
pbits = b
endnb = nb
- for endnb <= ptrSize*8 {
- pbits |= pbits << endnb
- endnb += endnb
+ if nb+nb <= maxBits {
+ for endnb <= ptrSize*8 {
+ pbits |= pbits << endnb
+ endnb += endnb
+ }
+ // Truncate to a multiple of original ptrmask.
+ endnb = maxBits / nb * nb
+ pbits &= 1<<endnb - 1
+ b = pbits
+ nb = endnb
}
- // Truncate to an multiple of original ptrmask.
- endnb = (ptrSize*8 - 7) / nb * nb
- pbits &= 1<<endnb - 1
- b = pbits
- nb = endnb
// Clear p and endp as sentinel for using pbits.
// Checked during Phase 2 loop.
endp = nil
} else {
// Ptrmask is larger. Read it multiple times.
- endp = addb(ptrmask, (typ.size/ptrSize+7)/8-1)
- endnb = uint32(typ.size/ptrSize) % 8
- if endnb == 0 {
- endnb = 8
- }
+ n := (typ.ptrdata/ptrSize+7)/8 - 1
+ endp = addb(ptrmask, n)
+ endnb = typ.size/ptrSize - n*8
}
}
if p != nil {
nb = 8
}
- w := uintptr(0) // number of words processed
- nw := dataSize / ptrSize // number of words to process
+ var w uintptr // words processed
+ var nw uintptr // total number of words to process
+ if typ.size == dataSize {
+ // Single entry: can stop once we reach the non-pointer data.
+ nw = typ.ptrdata / ptrSize
+ } else {
+ // Repeated instances of typ in an array.
+ // Have to process the
+ nw = ((dataSize/typ.size-1)*typ.size + typ.ptrdata) / ptrSize
+ }
+ if nw == 0 {
+ // No pointers! Caller was supposed to check.
+ println("runtime: invalid type ", *typ._string)
+ throw("heapBitsSetType: called with non-pointer type")
+ return
+ }
+ if nw < 2 {
+ // Must write at least 2 words, because the "no scan"
+ // encoding doesn't take effect until the third word.
+ nw = 2
+ }
hbitp := h.bitp // next heap bitmap byte to write
var hb uintptr // bits being preapred for *h.bitp
// which do not have the marked bits set.
// The leading half-byte is special because it's a half a byte and must be
// manipulated atomically.
- switch h.shift {
+ switch {
default:
throw("heapBitsSetType: unexpected shift")
- case 0:
+ case h.shift == 0:
// Ptrmask and heap bitmap are aligned.
// Handle first byte of bitmap specially.
// The first byte we write out contains the first two words of the object.
b >>= 4
nb -= 4
- case 4:
+ case ptrSize == 8 && h.shift == 4:
// Ptrmask and heap bitmap are misaligned.
// The bits for the first two words are in a byte shared with another object
// and must be updated atomically.
// Note: no bitMarker in hb because the first two words don't get markers from us.
atomicor8(hbitp, uint8(hb))
hbitp = subtractb(hbitp, 1)
-
- // Expand 8-bit chunks of ptrmask into pairs of heap bitmap bytes.
- // We know the object size is a multiple of 2 words but not 4, so the
- // object size minus the 2 words we just handled is a multiple of 4,
- // so we can use non-atomic writes to the heap bitmap for the
- // rest of this code, even for the final fragment or a trailing dead marker byte.
-
- // Loop prepares bits for final byte but stops before writing them,
- // so that in the case where we need to write only part of a byte,
- // the code below the loop can truncate the bitMarked.
- w += 2
+ if w += 2; w >= nw {
+ // We know that there is more data, because we handled 2-word objects above.
+ // This must be at least a 6-word object. If we're out of pointer words,
+ // mark no scan in next bitmap byte and finish.
+ *hbitp = 0
+ goto Phase4
+ }
}
// Phase 2: Full bytes in bitmap, up to but not including write to last byte (full or partial) in bitmap.
}
}
- const test = false // slow but helpful
- if test {
+Phase4:
+ // Phase 4: all done (goto target).
+
+ if doubleCheck {
// Double-check that bits to be written were written correctly.
// Does not check that other bits were not written, unfortunately.
h := heapBitsForAddr(x)
- nptr := typ.size / ptrSize
+ nptr := typ.ptrdata / ptrSize
+ ndata := typ.size / ptrSize
+ count := dataSize / typ.size
for i := uintptr(0); i <= dataSize/ptrSize; i++ {
- j := i % nptr
+ j := i % ndata
var have, want uint8
- if i == dataSize/ptrSize {
- if dataSize >= size {
- break
- }
- have = (*h.bitp >> h.shift) & 3
- want = 0 // dead bits
+ if i == dataSize/ptrSize && dataSize >= size {
+ break
+ }
+ have = (*h.bitp >> h.shift) & 3
+ if i == dataSize/ptrSize || i/ndata == count-1 && j >= nptr {
+ want = 0 // dead marker
} else {
- have = (*h.bitp >> h.shift) & 3
- if (*addb(ptrmask, j/8)>>(j%8))&1 != 0 {
+ if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 {
want |= bitPointer
}
if i >= 2 {
}
if have != want {
println("mismatch writing bits for", *typ._string, "x", dataSize/typ.size)
- print("typ.size=", typ.size, " dataSize=", dataSize, " size=", size, "\n")
+ print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
+ print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
h = heapBitsForAddr(x)
print("initial bits h.bitp=", h.bitp, " h.shift=", h.shift, "\n")
- print("p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n")
+ print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n")
println("at word", i, "offset", i*ptrSize, "have", have, "want", want)
throw("bad heapBitsSetType")
}
+ if i >= 2 && want == 0 {
+ // found dead marker; the rest is uninitialized
+ break
+ }
h = h.next()
}
}
mask[i/ptrSize] = 1
}
if i >= 2*ptrSize && !hbits.isMarked() {
- mask[i/ptrSize] = 255
+ mask = mask[:i/ptrSize]
break
}
}