Currently we assume alignment to 8 bytes, so we can steal the low 3 bits.
This CL assumes alignment to 512 bytes, so we can steal the low 9 bits.
That's 6 extra bits!
Aligning to 512 bytes wastes a bit of space but it is not egregious.
Most of the objects that we make tagged pointers to are pretty big.
Update #49405
Change-Id: I66fc7784ac1be5f12f285de1d7851d5a6871fb75
Reviewed-on: https://go-review.googlesource.com/c/go/+/665815
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Auto-Submit: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
// PersistentAlloc allocates some memory that lives outside the Go heap.
// This memory will never be freed; use sparingly.
-func PersistentAlloc(n uintptr) unsafe.Pointer {
- return persistentalloc(n, 0, &memstats.other_sys)
+func PersistentAlloc(n, align uintptr) unsafe.Pointer {
+ return persistentalloc(n, align, &memstats.other_sys)
}
+const TagAlign = tagAlign
+
// FPCallers works like Callers and uses frame pointer unwinding to populate
// pcBuf with the return addresses of the physical frames on the stack.
func FPCallers(pcBuf []uintptr) int {
func (head *lfstack) push(node *lfnode) {
node.pushcnt++
new := lfstackPack(node, node.pushcnt)
- if node1 := lfstackUnpack(new); node1 != node {
- print("runtime: lfstack.push invalid packing: node=", node, " cnt=", hex(node.pushcnt), " packed=", hex(new), " -> node=", node1, "\n")
- throw("lfstack.push")
- }
for {
old := atomic.Load64((*uint64)(head))
node.next = old
if base, _, _ := findObject(uintptr(unsafe.Pointer(node)), 0, 0); base != 0 {
throw("lfstack node allocated from the heap")
}
- if lfstackUnpack(lfstackPack(node, ^uintptr(0))) != node {
- printlock()
- println("runtime: bad lfnode address", hex(uintptr(unsafe.Pointer(node))))
- throw("bad lfnode address")
- }
+ lfstackPack(node, ^uintptr(0))
}
func lfstackPack(node *lfnode, cnt uintptr) uint64 {
- return uint64(taggedPointerPack(unsafe.Pointer(node), cnt))
+ return uint64(taggedPointerPack(unsafe.Pointer(node), cnt&(1<<tagBits-1)))
}
func lfstackUnpack(val uint64) *lfnode {
// We require lfstack objects to live outside the heap so that
// checkptr passes on the unsafe shenanigans used.
func allocMyNode(data int) *MyNode {
- n := (*MyNode)(PersistentAlloc(unsafe.Sizeof(MyNode{})))
+ n := (*MyNode)(PersistentAlloc(unsafe.Sizeof(MyNode{}), TagAlign))
LFNodeValidate(&n.LFNode)
n.data = data
return n
// gcBgMarkWorker().
m muintptr
}
+type gcBgMarkWorkerNodePadded struct {
+ gcBgMarkWorkerNode
+ pad [tagAlign - unsafe.Sizeof(gcBgMarkWorkerNode{}) - gcBgMarkWorkerNodeRedZoneSize]byte
+}
+
+const gcBgMarkWorkerNodeRedZoneSize = (16 << 2) * asanenabledBit // redZoneSize(512)
func gcBgMarkWorker(ready chan struct{}) {
gp := getg()
// the stack (see gopark). Prevent deadlock from recursively
// starting GC by disabling preemption.
gp.m.preemptoff = "GC worker init"
- node := new(gcBgMarkWorkerNode)
+ node := &new(gcBgMarkWorkerNodePadded).gcBgMarkWorkerNode // TODO: technically not allowed in the heap. See comment in tagptr.go.
gp.m.preemptoff = ""
node.gp.set(gp)
spanSetInitSpineCap = 256 // Enough for 1GB heap on 64-bit
)
-type spanSetBlock struct {
+type spanSetBlockHeader struct {
// Free spanSetBlocks are managed via a lock-free stack.
lfnode
// this block. This number is used to help determine when a block
// may be safely recycled.
popped atomic.Uint32
+}
+
+type spanSetBlockHeader2 struct {
+ spanSetBlockHeader
+ pad [tagAlign - unsafe.Sizeof(spanSetBlockHeader{})]byte
+}
+
+type spanSetBlock struct {
+ spanSetBlockHeader2
// spans is the set of spans in this block.
spans [spanSetBlockEntries]atomicMSpanPointer
if s := (*spanSetBlock)(p.stack.pop()); s != nil {
return s
}
- return (*spanSetBlock)(persistentalloc(unsafe.Sizeof(spanSetBlock{}), cpu.CacheLineSize, &memstats.gcMiscSys))
+ return (*spanSetBlock)(persistentalloc(unsafe.Sizeof(spanSetBlock{}), max(cpu.CacheLineSize, tagAlign), &memstats.gcMiscSys))
}
// free returns a spanSetBlock back to the pool.
func (c *pollCache) alloc() *pollDesc {
lock(&c.lock)
if c.first == nil {
- const pdSize = unsafe.Sizeof(pollDesc{})
+ type pollDescPadded struct {
+ pollDesc
+ pad [tagAlign - unsafe.Sizeof(pollDesc{})]byte
+ }
+ const pdSize = unsafe.Sizeof(pollDescPadded{})
n := pollBlockSize / pdSize
if n == 0 {
n = 1
}
// Must be in non-GC memory because can be referenced
// only from epoll/kqueue internals.
- mem := persistentalloc(n*pdSize, 0, &memstats.other_sys)
+ mem := persistentalloc(n*pdSize, tagAlign, &memstats.other_sys)
for i := uintptr(0); i < n; i++ {
pd := (*pollDesc)(add(mem, i*pdSize))
lockInit(&pd.lock, lockRankPollDesc)
// taggedPointer is a pointer with a numeric tag.
// The size of the numeric tag is GOARCH-dependent,
-// currently at least 10 bits.
+// currently at least 16 bits.
// This should only be used with pointers allocated outside the Go heap.
type taggedPointer uint64
// minTagBits is the minimum number of tag bits that we expect.
-const minTagBits = 10
+const minTagBits = 16
+
+// # of bits we can steal from the bottom. We enforce that all pointers
+// that we tag are aligned to at least this many bits.
+// Currently the long pole in this tent is pollDesc at 280 bytes. Setting
+// 9 here rounds those structs up to 512 bytes.
+// gcBgMarkWorkerNode is also small, but we don't make many of those
+// so it is ok to waste space on them.
+const tagAlignBits = 9
+const tagAlign = 1 << tagAlignBits
// The number of bits stored in the numeric tag of a taggedPointer
const taggedPointerBits = 32
+// The number of bits allowed in a tag.
+const tagBits = 32
+
// On 32-bit systems, taggedPointer has a 32-bit pointer and 32-bit count.
// taggedPointerPack created a taggedPointer from a pointer and a tag.
// get to really high addresses and panic if it does.
addrBits = 48
- // In addition to the 16 bits taken from the top, we can take 3 from the
- // bottom, because node must be pointer-aligned, giving a total of 19 bits
- // of count.
- tagBits = 64 - addrBits + 3
+ // In addition to the 16 bits taken from the top, we can take 9 from the
+ // bottom, because we require pointers to be well-aligned (see tagptr.go:tagAlignBits).
+ // That gives us a total of 25 bits for the tag.
+ tagBits = 64 - addrBits + tagAlignBits
// On AIX, 64-bit addresses are split into 36-bit segment number and 28-bit
// offset in segment. Segment numbers in the range 0x0A0000000-0x0AFFFFFFF(LSA)
// We assume all tagged addresses are from memory allocated with mmap.
// We use one bit to distinguish between the two ranges.
aixAddrBits = 57
- aixTagBits = 64 - aixAddrBits + 3
+ aixTagBits = 64 - aixAddrBits + tagAlignBits
// riscv64 SV57 mode gives 56 bits of userspace VA.
// tagged pointer code supports it,
// but broader support for SV57 mode is incomplete,
// and there may be other issues (see #54104).
riscv64AddrBits = 56
- riscv64TagBits = 64 - riscv64AddrBits + 3
+ riscv64TagBits = 64 - riscv64AddrBits + tagAlignBits
)
// The number of bits stored in the numeric tag of a taggedPointer
// taggedPointerPack created a taggedPointer from a pointer and a tag.
// Tag bits that don't fit in the result are discarded.
func taggedPointerPack(ptr unsafe.Pointer, tag uintptr) taggedPointer {
+ var t taggedPointer
if GOOS == "aix" {
if GOARCH != "ppc64" {
throw("check this code for aix on non-ppc64")
}
- return taggedPointer(uint64(uintptr(ptr))<<(64-aixAddrBits) | uint64(tag&(1<<aixTagBits-1)))
+ t = taggedPointer(uint64(uintptr(ptr))<<(64-aixAddrBits) | uint64(tag&(1<<aixTagBits-1)))
+ } else if GOARCH == "riscv64" {
+ t = taggedPointer(uint64(uintptr(ptr))<<(64-riscv64AddrBits) | uint64(tag&(1<<riscv64TagBits-1)))
+ } else {
+ t = taggedPointer(uint64(uintptr(ptr))<<(64-addrBits) | uint64(tag&(1<<tagBits-1)))
}
- if GOARCH == "riscv64" {
- return taggedPointer(uint64(uintptr(ptr))<<(64-riscv64AddrBits) | uint64(tag&(1<<riscv64TagBits-1)))
+ if t.pointer() != ptr || t.tag() != tag {
+ print("runtime: taggedPointerPack invalid packing: ptr=", ptr, " tag=", hex(tag), " packed=", hex(t), " -> ptr=", t.pointer(), " tag=", hex(t.tag()), "\n")
+ throw("taggedPointerPack")
}
- return taggedPointer(uint64(uintptr(ptr))<<(64-addrBits) | uint64(tag&(1<<tagBits-1)))
+ return t
}
// Pointer returns the pointer from a taggedPointer.
if GOARCH == "amd64" {
// amd64 systems can place the stack above the VA hole, so we need to sign extend
// val before unpacking.
- return unsafe.Pointer(uintptr(int64(tp) >> tagBits << 3))
+ return unsafe.Pointer(uintptr(int64(tp) >> tagBits << tagAlignBits))
}
if GOOS == "aix" {
- return unsafe.Pointer(uintptr((tp >> aixTagBits << 3) | 0xa<<56))
+ return unsafe.Pointer(uintptr((tp >> aixTagBits << tagAlignBits) | 0xa<<56))
}
if GOARCH == "riscv64" {
- return unsafe.Pointer(uintptr(tp >> riscv64TagBits << 3))
+ return unsafe.Pointer(uintptr(tp >> riscv64TagBits << tagAlignBits))
}
- return unsafe.Pointer(uintptr(tp >> tagBits << 3))
+ return unsafe.Pointer(uintptr(tp >> tagBits << tagAlignBits))
}
// Tag returns the tag from a taggedPointer.