From: Russ Cox Date: Sat, 15 Nov 2014 13:00:38 +0000 (-0500) Subject: [dev.garbage] all: merge dev.cc into dev.garbage X-Git-Tag: go1.5beta1~2684^2~10 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=0fcf54b3d2bc42a947c65e9a520d078b671f8432;p=gostls13.git [dev.garbage] all: merge dev.cc into dev.garbage The garbage collector is now written in Go. There is plenty to clean up (just like on dev.cc). all.bash passes on darwin/amd64, darwin/386, linux/amd64, linux/386. TBR=rlh R=austin, rlh, bradfitz CC=golang-codereviews https://golang.org/cl/173250043 --- 0fcf54b3d2bc42a947c65e9a520d078b671f8432 diff --cc src/runtime/lfstack.go index 0000000000,4a20fff9d8..a4ad8a10c6 mode 000000,100644..100644 --- a/src/runtime/lfstack.go +++ b/src/runtime/lfstack.go @@@ -1,0 -1,40 +1,36 @@@ + // Copyright 2012 The Go Authors. All rights reserved. + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + + // Lock-free stack. + // The following code runs only on g0 stack. + + package runtime + + import "unsafe" + + func lfstackpush(head *uint64, node *lfnode) { + node.pushcnt++ + new := lfstackPack(node, node.pushcnt) + for { + old := atomicload64(head) - node.next, _ = lfstackUnpack(old) ++ node.next = old + if cas64(head, old, new) { + break + } + } + } + + func lfstackpop(head *uint64) unsafe.Pointer { + for { + old := atomicload64(head) + if old == 0 { + return nil + } + node, _ := lfstackUnpack(old) - node2 := (*lfnode)(atomicloadp(unsafe.Pointer(&node.next))) - new := uint64(0) - if node2 != nil { - new = lfstackPack(node2, node2.pushcnt) - } - if cas64(head, old, new) { ++ next := atomicload64(&node.next) ++ if cas64(head, old, next) { + return unsafe.Pointer(node) + } + } + } diff --cc src/runtime/malloc.go index fab8cf2695,20cb6818d2..f90a8f84a3 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@@ -306,18 -295,6 +297,17 @@@ func mallocgc(size uintptr, typ *_type } } marked: + + // GCmarkterminate allocates black + // All slots hold nil so no scanning is needed. + // This may be racing with GC so do it atomically if there can be + // a race marking the bit. + if gcphase == _GCmarktermination { - mp := acquirem() - mp.ptrarg[0] = x - onM(gcmarknewobject_m) - releasem(mp) ++ systemstack(func() { ++ gcmarknewobject_m(uintptr(x)) ++ }) + } + if raceenabled { racemalloc(x, size) } @@@ -358,37 -335,6 +348,36 @@@ return x } +func loadPtrMask(typ *_type) []uint8 { + var ptrmask *uint8 + nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize + if typ.kind&kindGCProg != 0 { + masksize := nptr + if masksize%2 != 0 { + masksize *= 2 // repeated + } + masksize = masksize * pointersPerByte / 8 // 4 bits per word + masksize++ // unroll flag in the beginning + if masksize > maxGCMask && typ.gc[1] != 0 { + // write barriers have not been updated to deal with this case yet. + gothrow("maxGCMask too small for now") + } + ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0]))) + // Check whether the program is already unrolled + // by checking if the unroll flag byte is set + maskword := uintptr(atomicloadp(unsafe.Pointer(ptrmask))) + if *(*uint8)(unsafe.Pointer(&maskword)) == 0 { - mp := acquirem() - mp.ptrarg[0] = unsafe.Pointer(typ) - onM(unrollgcprog_m) - releasem(mp) ++ systemstack(func() { ++ unrollgcprog_m(typ) ++ }) + } + ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte + } else { + ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask + } + return (*[1 << 30]byte)(unsafe.Pointer(ptrmask))[:(nptr+1)/2] +} + // implementation of new builtin func newobject(typ *_type) unsafe.Pointer { flags := uint32(0) @@@ -483,20 -429,7 +472,21 @@@ func gogc(force int32) mp = acquirem() mp.gcing = 1 releasem(mp) + - onM(stoptheworld) - onM(finishsweep_m) // finish sweep before we start concurrent scan. - if false { // To turn on concurrent scan and mark set to true... - onM(starttheworld) + systemstack(stoptheworld) ++ systemstack(finishsweep_m) // finish sweep before we start concurrent scan. ++ if false { // To turn on concurrent scan and mark set to true... ++ systemstack(starttheworld) + // Do a concurrent heap scan before we stop the world. - onM(gcscan_m) - onM(stoptheworld) - onM(gcinstallmarkwb_m) - onM(starttheworld) - onM(gcmark_m) - onM(stoptheworld) - onM(gcinstalloffwb_m) ++ systemstack(gcscan_m) ++ systemstack(stoptheworld) ++ systemstack(gcinstallmarkwb_m) ++ systemstack(starttheworld) ++ systemstack(gcmark_m) ++ systemstack(stoptheworld) ++ systemstack(gcinstalloffwb_m) + } ++ if mp != acquirem() { gothrow("gogc: rescheduled") } @@@ -512,23 -445,17 +502,21 @@@ if debug.gctrace > 1 { n = 2 } ++ eagersweep := force >= 2 for i := 0; i < n; i++ { if i > 0 { startTime = nanotime() } // switch to g0, call gc, then switch back - mp.scalararg[0] = uintptr(uint32(startTime)) // low 32 bits - mp.scalararg[1] = uintptr(startTime >> 32) // high 32 bits - if force >= 2 { - mp.scalararg[2] = 1 // eagersweep - } else { - mp.scalararg[2] = 0 - } - onM(gc_m) - eagersweep := force >= 2 + systemstack(func() { + gc_m(startTime, eagersweep) + }) } - onM(gccheckmark_m) ++ systemstack(func() { ++ gccheckmark_m(startTime, eagersweep) ++ }) + // all done mp.gcing = 0 semrelease(&worldsema) @@@ -543,14 -470,6 +531,14 @@@ } } +func GCcheckmarkenable() { - onM(gccheckmarkenable_m) ++ systemstack(gccheckmarkenable_m) +} + +func GCcheckmarkdisable() { - onM(gccheckmarkdisable_m) ++ systemstack(gccheckmarkdisable_m) +} + // GC runs a garbage collection. func GC() { gogc(2) diff --cc src/runtime/malloc2.go index 0000000000,e4bd963d30..4ac0207b1e mode 000000,100644..100644 --- a/src/runtime/malloc2.go +++ b/src/runtime/malloc2.go @@@ -1,0 -1,475 +1,473 @@@ + // Copyright 2009 The Go Authors. All rights reserved. + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + + package runtime + + import "unsafe" + + // Memory allocator, based on tcmalloc. + // http://goog-perftools.sourceforge.net/doc/tcmalloc.html + + // The main allocator works in runs of pages. + // Small allocation sizes (up to and including 32 kB) are + // rounded to one of about 100 size classes, each of which + // has its own free list of objects of exactly that size. + // Any free page of memory can be split into a set of objects + // of one size class, which are then managed using free list + // allocators. + // + // The allocator's data structures are: + // + // FixAlloc: a free-list allocator for fixed-size objects, + // used to manage storage used by the allocator. + // MHeap: the malloc heap, managed at page (4096-byte) granularity. + // MSpan: a run of pages managed by the MHeap. + // MCentral: a shared free list for a given size class. + // MCache: a per-thread (in Go, per-P) cache for small objects. + // MStats: allocation statistics. + // + // Allocating a small object proceeds up a hierarchy of caches: + // + // 1. Round the size up to one of the small size classes + // and look in the corresponding MCache free list. + // If the list is not empty, allocate an object from it. + // This can all be done without acquiring a lock. + // + // 2. If the MCache free list is empty, replenish it by + // taking a bunch of objects from the MCentral free list. + // Moving a bunch amortizes the cost of acquiring the MCentral lock. + // + // 3. If the MCentral free list is empty, replenish it by + // allocating a run of pages from the MHeap and then + // chopping that memory into a objects of the given size. + // Allocating many objects amortizes the cost of locking + // the heap. + // + // 4. If the MHeap is empty or has no page runs large enough, + // allocate a new group of pages (at least 1MB) from the + // operating system. Allocating a large run of pages + // amortizes the cost of talking to the operating system. + // + // Freeing a small object proceeds up the same hierarchy: + // + // 1. Look up the size class for the object and add it to + // the MCache free list. + // + // 2. If the MCache free list is too long or the MCache has + // too much memory, return some to the MCentral free lists. + // + // 3. If all the objects in a given span have returned to + // the MCentral list, return that span to the page heap. + // + // 4. If the heap has too much memory, return some to the + // operating system. + // + // TODO(rsc): Step 4 is not implemented. + // + // Allocating and freeing a large object uses the page heap + // directly, bypassing the MCache and MCentral free lists. + // + // The small objects on the MCache and MCentral free lists + // may or may not be zeroed. They are zeroed if and only if + // the second word of the object is zero. A span in the + // page heap is zeroed unless s->needzero is set. When a span + // is allocated to break into small objects, it is zeroed if needed + // and s->needzero is set. There are two main benefits to delaying the + // zeroing this way: + // + // 1. stack frames allocated from the small object lists + // or the page heap can avoid zeroing altogether. + // 2. the cost of zeroing when reusing a small object is + // charged to the mutator, not the garbage collector. + // + // This C code was written with an eye toward translating to Go + // in the future. Methods have the form Type_Method(Type *t, ...). + + const ( + _PageShift = 13 + _PageSize = 1 << _PageShift + _PageMask = _PageSize - 1 + ) + + const ( + // _64bit = 1 on 64-bit systems, 0 on 32-bit systems + _64bit = 1 << (^uintptr(0) >> 63) / 2 + + // Computed constant. The definition of MaxSmallSize and the + // algorithm in msize.c produce some number of different allocation + // size classes. NumSizeClasses is that number. It's needed here + // because there are static arrays of this length; when msize runs its + // size choosing algorithm it double-checks that NumSizeClasses agrees. + _NumSizeClasses = 67 + + // Tunable constants. + _MaxSmallSize = 32 << 10 + + // Tiny allocator parameters, see "Tiny allocator" comment in malloc.goc. + _TinySize = 16 + _TinySizeClass = 2 + + _FixAllocChunk = 16 << 10 // Chunk size for FixAlloc + _MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap. + _HeapAllocChunk = 1 << 20 // Chunk size for heap growth + + // Per-P, per order stack segment cache size. + _StackCacheSize = 32 * 1024 + + // Number of orders that get caching. Order 0 is FixedStack + // and each successive order is twice as large. + _NumStackOrders = 3 + + // Number of bits in page to span calculations (4k pages). + // On Windows 64-bit we limit the arena to 32GB or 35 bits. + // Windows counts memory used by page table into committed memory + // of the process, so we can't reserve too much memory. + // See http://golang.org/issue/5402 and http://golang.org/issue/5236. + // On other 64-bit platforms, we limit the arena to 128GB, or 37 bits. + // On 32-bit, we don't bother limiting anything, so we use the full 32-bit address. + _MHeapMap_TotalBits = (_64bit*_Windows)*35 + (_64bit*(1-_Windows))*37 + (1-_64bit)*32 + _MHeapMap_Bits = _MHeapMap_TotalBits - _PageShift + + _MaxMem = uintptr(1<<_MHeapMap_TotalBits - 1) + + // Max number of threads to run garbage collection. + // 2, 3, and 4 are all plausible maximums depending + // on the hardware details of the machine. The garbage + // collector scales well to 32 cpus. + _MaxGcproc = 32 + ) + + // A generic linked list of blocks. (Typically the block is bigger than sizeof(MLink).) + type mlink struct { + next *mlink + } + + // sysAlloc obtains a large chunk of zeroed memory from the + // operating system, typically on the order of a hundred kilobytes + // or a megabyte. + // NOTE: sysAlloc returns OS-aligned memory, but the heap allocator + // may use larger alignment, so the caller must be careful to realign the + // memory obtained by sysAlloc. + // + // SysUnused notifies the operating system that the contents + // of the memory region are no longer needed and can be reused + // for other purposes. + // SysUsed notifies the operating system that the contents + // of the memory region are needed again. + // + // SysFree returns it unconditionally; this is only used if + // an out-of-memory error has been detected midway through + // an allocation. It is okay if SysFree is a no-op. + // + // SysReserve reserves address space without allocating memory. + // If the pointer passed to it is non-nil, the caller wants the + // reservation there, but SysReserve can still choose another + // location if that one is unavailable. On some systems and in some + // cases SysReserve will simply check that the address space is + // available and not actually reserve it. If SysReserve returns + // non-nil, it sets *reserved to true if the address space is + // reserved, false if it has merely been checked. + // NOTE: SysReserve returns OS-aligned memory, but the heap allocator + // may use larger alignment, so the caller must be careful to realign the + // memory obtained by sysAlloc. + // + // SysMap maps previously reserved address space for use. + // The reserved argument is true if the address space was really + // reserved, not merely checked. + // + // SysFault marks a (already sysAlloc'd) region to fault + // if accessed. Used only for debugging the runtime. + + // FixAlloc is a simple free-list allocator for fixed size objects. + // Malloc uses a FixAlloc wrapped around sysAlloc to manages its + // MCache and MSpan objects. + // + // Memory returned by FixAlloc_Alloc is not zeroed. + // The caller is responsible for locking around FixAlloc calls. + // Callers can keep state in the object but the first word is + // smashed by freeing and reallocating. + type fixalloc struct { + size uintptr + first unsafe.Pointer // go func(unsafe.pointer, unsafe.pointer); f(arg, p) called first time p is returned + arg unsafe.Pointer + list *mlink + chunk *byte + nchunk uint32 + inuse uintptr // in-use bytes now + stat *uint64 + } + + // Statistics. + // Shared with Go: if you edit this structure, also edit type MemStats in mem.go. + type mstats struct { + // General statistics. + alloc uint64 // bytes allocated and still in use + total_alloc uint64 // bytes allocated (even if freed) + sys uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate) + nlookup uint64 // number of pointer lookups + nmalloc uint64 // number of mallocs + nfree uint64 // number of frees + + // Statistics about malloc heap. + // protected by mheap.lock + heap_alloc uint64 // bytes allocated and still in use + heap_sys uint64 // bytes obtained from system + heap_idle uint64 // bytes in idle spans + heap_inuse uint64 // bytes in non-idle spans + heap_released uint64 // bytes released to the os + heap_objects uint64 // total number of allocated objects + + // Statistics about allocation of low-level fixed-size structures. + // Protected by FixAlloc locks. + stacks_inuse uint64 // this number is included in heap_inuse above + stacks_sys uint64 // always 0 in mstats + mspan_inuse uint64 // mspan structures + mspan_sys uint64 + mcache_inuse uint64 // mcache structures + mcache_sys uint64 + buckhash_sys uint64 // profiling bucket hash table + gc_sys uint64 + other_sys uint64 + + // Statistics about garbage collector. + // Protected by mheap or stopping the world during GC. + next_gc uint64 // next gc (in heap_alloc time) + last_gc uint64 // last gc (in absolute time) + pause_total_ns uint64 + pause_ns [256]uint64 // circular buffer of recent gc pause lengths + pause_end [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970) + numgc uint32 + enablegc bool + debuggc bool + + // Statistics about allocation size classes. + + by_size [_NumSizeClasses]struct { + size uint32 + nmalloc uint64 + nfree uint64 + } + + tinyallocs uint64 // number of tiny allocations that didn't cause actual allocation; not exported to go directly + } + + var memstats mstats + + // Size classes. Computed and initialized by InitSizes. + // + // SizeToClass(0 <= n <= MaxSmallSize) returns the size class, + // 1 <= sizeclass < NumSizeClasses, for n. + // Size class 0 is reserved to mean "not small". + // + // class_to_size[i] = largest size in class i + // class_to_allocnpages[i] = number of pages to allocate when + // making new objects in class i + + var class_to_size [_NumSizeClasses]int32 + var class_to_allocnpages [_NumSizeClasses]int32 + var size_to_class8 [1024/8 + 1]int8 + var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8 + + type mcachelist struct { + list *mlink + nlist uint32 + } + + type stackfreelist struct { + list *mlink // linked list of free stacks + size uintptr // total size of stacks in list + } + + // Per-thread (in Go, per-P) cache for small objects. + // No locking needed because it is per-thread (per-P). + type mcache struct { + // The following members are accessed on every malloc, + // so they are grouped here for better caching. + next_sample int32 // trigger heap sample after allocating this many bytes + local_cachealloc intptr // bytes allocated (or freed) from cache since last lock of heap + // Allocator cache for tiny objects w/o pointers. + // See "Tiny allocator" comment in malloc.goc. + tiny *byte + tinysize uintptr + local_tinyallocs uintptr // number of tiny allocs not counted in other stats + + // The rest is not accessed on every malloc. + alloc [_NumSizeClasses]*mspan // spans to allocate from + + stackcache [_NumStackOrders]stackfreelist + + sudogcache *sudog + - gcworkbuf unsafe.Pointer - + // Local allocator stats, flushed during GC. + local_nlookup uintptr // number of pointer lookups + local_largefree uintptr // bytes freed for large objects (>maxsmallsize) + local_nlargefree uintptr // number of frees for large objects (>maxsmallsize) + local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize) + } + + const ( + _KindSpecialFinalizer = 1 + _KindSpecialProfile = 2 + // Note: The finalizer special must be first because if we're freeing + // an object, a finalizer special will cause the freeing operation + // to abort, and we want to keep the other special records around + // if that happens. + ) + + type special struct { + next *special // linked list in span + offset uint16 // span offset of object + kind byte // kind of special + } + + // The described object has a finalizer set for it. + type specialfinalizer struct { + special special + fn *funcval + nret uintptr + fint *_type + ot *ptrtype + } + + // The described object is being heap profiled. + type specialprofile struct { + special special + b *bucket + } + + // An MSpan is a run of pages. + const ( + _MSpanInUse = iota // allocated for garbage collected heap + _MSpanStack // allocated for use by stack allocator + _MSpanFree + _MSpanListHead + _MSpanDead + ) + + type mspan struct { + next *mspan // in a span linked list + prev *mspan // in a span linked list + start pageID // starting page number + npages uintptr // number of pages in span + freelist *mlink // list of free objects + // sweep generation: + // if sweepgen == h->sweepgen - 2, the span needs sweeping + // if sweepgen == h->sweepgen - 1, the span is currently being swept + // if sweepgen == h->sweepgen, the span is swept and ready to use + // h->sweepgen is incremented by 2 after every GC + sweepgen uint32 + ref uint16 // capacity - number of objects in freelist + sizeclass uint8 // size class + incache bool // being used by an mcache + state uint8 // mspaninuse etc + needzero uint8 // needs to be zeroed before allocation + elemsize uintptr // computed from sizeclass or from npages + unusedsince int64 // first time spotted by gc in mspanfree state + npreleased uintptr // number of pages released to the os + limit uintptr // end of data in span + speciallock mutex // guards specials list + specials *special // linked list of special records sorted by offset. + } + + // Every MSpan is in one doubly-linked list, + // either one of the MHeap's free lists or one of the + // MCentral's span lists. We use empty MSpan structures as list heads. + + // Central list of free objects of a given size. + type mcentral struct { + lock mutex + sizeclass int32 + nonempty mspan // list of spans with a free object + empty mspan // list of spans with no free objects (or cached in an mcache) + } + + // Main malloc heap. + // The heap itself is the "free[]" and "large" arrays, + // but all the other global data is here too. + type mheap struct { + lock mutex + free [_MaxMHeapList]mspan // free lists of given length + freelarge mspan // free lists length >= _MaxMHeapList + busy [_MaxMHeapList]mspan // busy lists of large objects of given length + busylarge mspan // busy lists of large objects length >= _MaxMHeapList + allspans **mspan // all spans out there + gcspans **mspan // copy of allspans referenced by gc marker or sweeper + nspan uint32 + sweepgen uint32 // sweep generation, see comment in mspan + sweepdone uint32 // all spans are swept + + // span lookup + spans **mspan + spans_mapped uintptr + + // range of addresses we might see in the heap + bitmap uintptr + bitmap_mapped uintptr + arena_start uintptr + arena_used uintptr + arena_end uintptr + arena_reserved bool + + // central free lists for small size classes. + // the padding makes sure that the MCentrals are + // spaced CacheLineSize bytes apart, so that each MCentral.lock + // gets its own cache line. + central [_NumSizeClasses]struct { + mcentral mcentral + pad [_CacheLineSize]byte + } + + spanalloc fixalloc // allocator for span* + cachealloc fixalloc // allocator for mcache* + specialfinalizeralloc fixalloc // allocator for specialfinalizer* + specialprofilealloc fixalloc // allocator for specialprofile* + speciallock mutex // lock for sepcial record allocators. + + // Malloc stats. + largefree uint64 // bytes freed for large objects (>maxsmallsize) + nlargefree uint64 // number of frees for large objects (>maxsmallsize) + nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize) + } + + var mheap_ mheap + + const ( + // flags to malloc + _FlagNoScan = 1 << 0 // GC doesn't have to scan object + _FlagNoZero = 1 << 1 // don't zero memory + ) + + // NOTE: Layout known to queuefinalizer. + type finalizer struct { + fn *funcval // function to call + arg unsafe.Pointer // ptr to object + nret uintptr // bytes of return values from fn + fint *_type // type of first argument of fn + ot *ptrtype // type of ptr to object + } + + type finblock struct { + alllink *finblock + next *finblock + cnt int32 + cap int32 + fin [1]finalizer + } + + // Information from the compiler about the layout of stack frames. + type bitvector struct { + n int32 // # of bits + bytedata *uint8 + } + + type stackmap struct { + n int32 // number of bitmaps + nbit int32 // number of bits in each bitmap + bytedata [0]byte // bitmaps, each starting on a 32-bit boundary + } + + // Returns pointer map data for the given stackmap index + // (the index is encoded in PCDATA_StackMapIndex). + + // defined in mgc0.go diff --cc src/runtime/mcache.go index 0000000000,d3afef6be6..08b1bc3597 mode 000000,100644..100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@@ -1,0 -1,86 +1,91 @@@ + // Copyright 2009 The Go Authors. All rights reserved. + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + + // Per-P malloc cache for small objects. + // + // See malloc.h for an overview. + + package runtime + + import "unsafe" + + // dummy MSpan that contains no free objects. + var emptymspan mspan + + func allocmcache() *mcache { + lock(&mheap_.lock) + c := (*mcache)(fixAlloc_Alloc(&mheap_.cachealloc)) + unlock(&mheap_.lock) + memclr(unsafe.Pointer(c), unsafe.Sizeof(*c)) + for i := 0; i < _NumSizeClasses; i++ { + c.alloc[i] = &emptymspan + } + + // Set first allocation sample size. + rate := MemProfileRate + if rate > 0x3fffffff { // make 2*rate not overflow + rate = 0x3fffffff + } + if rate != 0 { + c.next_sample = int32(int(fastrand1()) % (2 * rate)) + } + + return c + } + + func freemcache(c *mcache) { + systemstack(func() { + mCache_ReleaseAll(c) + stackcache_clear(c) - gcworkbuffree(c.gcworkbuf) ++ ++ // NOTE(rsc,rlh): If gcworkbuffree comes back, we need to coordinate ++ // with the stealing of gcworkbufs during garbage collection to avoid ++ // a race where the workbuf is double-freed. ++ // gcworkbuffree(c.gcworkbuf) ++ + lock(&mheap_.lock) + purgecachedstats(c) + fixAlloc_Free(&mheap_.cachealloc, unsafe.Pointer(c)) + unlock(&mheap_.lock) + }) + } + + // Gets a span that has a free object in it and assigns it + // to be the cached span for the given sizeclass. Returns this span. + func mCache_Refill(c *mcache, sizeclass int32) *mspan { + _g_ := getg() + + _g_.m.locks++ + // Return the current cached span to the central lists. + s := c.alloc[sizeclass] + if s.freelist != nil { + gothrow("refill on a nonempty span") + } + if s != &emptymspan { + s.incache = false + } + + // Get a new cached span from the central lists. + s = mCentral_CacheSpan(&mheap_.central[sizeclass].mcentral) + if s == nil { + gothrow("out of memory") + } + if s.freelist == nil { + println(s.ref, (s.npages<<_PageShift)/s.elemsize) + gothrow("empty span") + } + c.alloc[sizeclass] = s + _g_.m.locks-- + return s + } + + func mCache_ReleaseAll(c *mcache) { + for i := 0; i < _NumSizeClasses; i++ { + s := c.alloc[i] + if s != &emptymspan { + mCentral_UncacheSpan(&mheap_.central[i].mcentral, s) + c.alloc[i] = &emptymspan + } + } + } diff --cc src/runtime/mgc.go index 0000000000,f44d7ddbce..57bd8b3563 mode 000000,100644..100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@@ -1,0 -1,1798 +1,2422 @@@ + // Copyright 2009 The Go Authors. All rights reserved. + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + + // TODO(rsc): The code having to do with the heap bitmap needs very serious cleanup. + // It has gotten completely out of control. + + // Garbage collector (GC). + // -// GC is: -// - mark&sweep -// - mostly precise (with the exception of some C-allocated objects, assembly frames/arguments, etc) -// - parallel (up to MaxGcproc threads) -// - partially concurrent (mark is stop-the-world, while sweep is concurrent) -// - non-moving/non-compacting -// - full (non-partial) ++// The GC runs concurrently with mutator threads, is type accurate (aka precise), allows multiple GC ++// thread to run in parallel. It is a concurrent mark and sweep that uses a write barrier. It is ++// non-generational and non-compacting. Allocation is done using size segregated per P allocation ++// areas to minimize fragmentation while eliminating locks in the common case. + // -// GC rate. -// Next GC is after we've allocated an extra amount of memory proportional to -// the amount already in use. The proportion is controlled by GOGC environment variable -// (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M -// (this mark is tracked in next_gc variable). This keeps the GC cost in linear -// proportion to the allocation cost. Adjusting GOGC just changes the linear constant -// (and also the amount of extra memory used). ++// The algorithm decomposes into several steps. ++// This is a high level description of the algorithm being used. For an overview of GC a good ++// place to start is Richard Jones' gchandbook.org. ++// ++// The algorithm's intellectual heritage includes Dijkstra's on-the-fly algorithm, see ++// Edsger W. Dijkstra, Leslie Lamport, A. J. Martin, C. S. Scholten, and E. F. M. Steffens. 1978. ++// On-the-fly garbage collection: an exercise in cooperation. Commun. ACM 21, 11 (November 1978), 966-975. ++// For journal quality proofs that these steps are complete, correct, and terminate see ++// Hudson, R., and Moss, J.E.B. Copying Garbage Collection without stopping the world. ++// Concurrency and Computation: Practice and Experience 15(3-5), 2003. + // ++// 0. Set phase = GCscan from GCoff. ++// 1. Wait for all P's to acknowledge phase change. ++// At this point all goroutines have passed through a GC safepoint and ++// know we are in the GCscan phase. ++// 2. GC scans all goroutine stacks, mark and enqueues all encountered pointers ++// (marking avoids most duplicate enqueuing but races may produce duplication which is benign). ++// Preempted goroutines are scanned before P schedules next goroutine. ++// 3. Set phase = GCmark. ++// 4. Wait for all P's to acknowledge phase change. ++// 5. Now write barrier marks and enqueues black, grey, or white to white pointers. ++// Malloc still allocates white (non-marked) objects. ++// 6. Meanwhile GC transitively walks the heap marking reachable objects. ++// 7. When GC finishes marking heap, it preempts P's one-by-one and ++// retakes partial wbufs (filled by write barrier or during a stack scan of the goroutine ++// currently scheduled on the P). ++// 8. Once the GC has exhausted all available marking work it sets phase = marktermination. ++// 9. Wait for all P's to acknowledge phase change. ++// 10. Malloc now allocates black objects, so number of unmarked reachable objects ++// monotonically decreases. ++// 11. GC preempts P's one-by-one taking partial wbufs and marks all unmarked yet reachable objects. ++// 12. When GC completes a full cycle over P's and discovers no new grey ++// objects, (which means all reachable objects are marked) set phase = GCsweep. ++// 13. Wait for all P's to acknowledge phase change. ++// 14. Now malloc allocates white (but sweeps spans before use). ++// Write barrier becomes nop. ++// 15. GC does background sweeping, see description below. ++// 16. When sweeping is complete set phase to GCoff. ++// 17. When sufficient allocation has taken place replay the sequence starting at 0 above, ++// see discussion of GC rate below. ++ ++// Changing phases. ++// Phases are changed by setting the gcphase to the next phase and possibly calling ackgcphase. ++// All phase action must be benign in the presence of a change. ++// Starting with GCoff ++// GCoff to GCscan ++// GSscan scans stacks and globals greying them and never marks an object black. ++// Once all the P's are aware of the new phase they will scan gs on preemption. ++// This means that the scanning of preempted gs can't start until all the Ps ++// have acknowledged. ++// GCscan to GCmark ++// GCMark turns on the write barrier which also only greys objects. No scanning ++// of objects (making them black) can happen until all the Ps have acknowledged ++// the phase change. ++// GCmark to GCmarktermination ++// The only change here is that we start allocating black so the Ps must acknowledge ++// the change before we begin the termination algorithm ++// GCmarktermination to GSsweep ++// Object currently on the freelist must be marked black for this to work. ++// Are things on the free lists black or white? How does the sweep phase work? ++ + // Concurrent sweep. + // The sweep phase proceeds concurrently with normal program execution. + // The heap is swept span-by-span both lazily (when a goroutine needs another span) + // and concurrently in a background goroutine (this helps programs that are not CPU bound). + // However, at the end of the stop-the-world GC phase we don't know the size of the live heap, + // and so next_gc calculation is tricky and happens as follows. + // At the end of the stop-the-world phase next_gc is conservatively set based on total + // heap size; all spans are marked as "needs sweeping". + // Whenever a span is swept, next_gc is decremented by GOGC*newly_freed_memory. + // The background sweeper goroutine simply sweeps spans one-by-one bringing next_gc + // closer to the target value. However, this is not enough to avoid over-allocating memory. + // Consider that a goroutine wants to allocate a new span for a large object and + // there are no free swept spans, but there are small-object unswept spans. + // If the goroutine naively allocates a new span, it can surpass the yet-unknown + // target next_gc value. In order to prevent such cases (1) when a goroutine needs + // to allocate a new small-object span, it sweeps small-object spans for the same + // object size until it frees at least one object; (2) when a goroutine needs to + // allocate large-object span from heap, it sweeps spans until it frees at least + // that many pages into heap. Together these two measures ensure that we don't surpass + // target next_gc value by a large margin. There is an exception: if a goroutine sweeps + // and frees two nonadjacent one-page spans to the heap, it will allocate a new two-page span, + // but there can still be other one-page unswept spans which could be combined into a two-page span. + // It's critical to ensure that no operations proceed on unswept spans (that would corrupt + // mark bits in GC bitmap). During GC all mcaches are flushed into the central cache, + // so they are empty. When a goroutine grabs a new span into mcache, it sweeps it. + // When a goroutine explicitly frees an object or sets a finalizer, it ensures that + // the span is swept (either by sweeping it, or by waiting for the concurrent sweep to finish). + // The finalizer goroutine is kicked off only when all spans are swept. + // When the next GC starts, it sweeps all not-yet-swept spans (if any). + ++// GC rate. ++// Next GC is after we've allocated an extra amount of memory proportional to ++// the amount already in use. The proportion is controlled by GOGC environment variable ++// (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M ++// (this mark is tracked in next_gc variable). This keeps the GC cost in linear ++// proportion to the allocation cost. Adjusting GOGC just changes the linear constant ++// (and also the amount of extra memory used). ++ + package runtime + + import "unsafe" + + const ( + _DebugGC = 0 + _DebugGCPtrs = false // if true, print trace of every pointer load during GC + _ConcurrentSweep = true + + _WorkbufSize = 4 * 1024 + _FinBlockSize = 4 * 1024 + _RootData = 0 + _RootBss = 1 + _RootFinalizers = 2 + _RootSpans = 3 + _RootFlushCaches = 4 + _RootCount = 5 + ) + + // ptrmask for an allocation containing a single pointer. + var oneptr = [...]uint8{bitsPointer} + -// Initialized from $GOGC. GOGC=off means no gc. ++// Initialized from $GOGC. GOGC=off means no GC. + var gcpercent int32 + + // Holding worldsema grants an M the right to try to stop the world. + // The procedure is: + // + // semacquire(&worldsema); + // m.gcing = 1; + // stoptheworld(); + // + // ... do stuff ... + // + // m.gcing = 0; + // semrelease(&worldsema); + // starttheworld(); + // + var worldsema uint32 = 1 + ++// It is a bug if bits does not have bitBoundary set but ++// there are still some cases where this happens related ++// to stack spans. ++type markbits struct { ++ bitp *byte // pointer to the byte holding xbits ++ shift uintptr // bits xbits needs to be shifted to get bits ++ xbits byte // byte holding all the bits from *bitp ++ bits byte // mark and boundary bits relevant to corresponding slot. ++ tbits byte // pointer||scalar bits relevant to corresponding slot. ++} ++ + type workbuf struct { + node lfnode // must be first + nobj uintptr + obj [(_WorkbufSize - unsafe.Sizeof(lfnode{}) - ptrSize) / ptrSize]uintptr + } + + var data, edata, bss, ebss, gcdata, gcbss struct{} + + var finlock mutex // protects the following variables + var fing *g // goroutine that runs finalizers + var finq *finblock // list of finalizers that are to be executed + var finc *finblock // cache of free blocks + var finptrmask [_FinBlockSize / ptrSize / pointersPerByte]byte + var fingwait bool + var fingwake bool + var allfin *finblock // list of all blocks + + var gcdatamask bitvector + var gcbssmask bitvector + + var gclock mutex + + var badblock [1024]uintptr + var nbadblock int32 + + type workdata struct { + full uint64 // lock-free list of full blocks + empty uint64 // lock-free list of empty blocks ++ partial uint64 // lock-free list of partially filled blocks + pad0 [_CacheLineSize]uint8 // prevents false-sharing between full/empty and nproc/nwait + nproc uint32 + tstart int64 + nwait uint32 + ndone uint32 + alldone note + markfor *parfor + + // Copy of mheap.allspans for marker or sweeper. + spans []*mspan + } + + var work workdata + + //go:linkname weak_cgo_allocate go.weak.runtime._cgo_allocate_internal + var weak_cgo_allocate byte + + // Is _cgo_allocate linked into the binary? + func have_cgo_allocate() bool { + return &weak_cgo_allocate != nil + } + -// scanblock scans a block of n bytes starting at pointer b for references -// to other objects, scanning any it finds recursively until there are no -// unscanned objects left. Instead of using an explicit recursion, it keeps -// a work list in the Workbuf* structures and loops in the main function -// body. Keeping an explicit work list is easier on the stack allocator and -// more efficient. -func scanblock(b, n uintptr, ptrmask *uint8) { - // Cache memory arena parameters in local vars. - arena_start := mheap_.arena_start - arena_used := mheap_.arena_used - - wbuf := getempty(nil) - nobj := wbuf.nobj - wp := &wbuf.obj[nobj] - keepworking := b == 0 ++// To help debug the concurrent GC we remark with the world ++// stopped ensuring that any object encountered has their normal ++// mark bit set. To do this we use an orthogonal bit ++// pattern to indicate the object is marked. The following pattern ++// uses the upper two bits in the object's bounday nibble. ++// 01: scalar not marked ++// 10: pointer not marked ++// 11: pointer marked ++// 00: scalar marked ++// Xoring with 01 will flip the pattern from marked to unmarked and vica versa. ++// The higher bit is 1 for pointers and 0 for scalars, whether the object ++// is marked or not. ++// The first nibble no longer holds the bitsDead pattern indicating that the ++// there are no more pointers in the object. This information is held ++// in the second nibble. ++ ++// When marking an object if the bool checkmark is true one uses the above ++// encoding, otherwise one uses the bitMarked bit in the lower two bits ++// of the nibble. ++var ( ++ checkmark = false ++ gccheckmarkenable = true ++) + - var ptrbitp unsafe.Pointer ++// Is address b in the known heap. If it doesn't have a valid gcmap ++// returns false. For example pointers into stacks will return false. ++func inheap(b uintptr) bool { ++ if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used { ++ return false ++ } ++ // Not a beginning of a block, consult span table to find the block beginning. ++ k := b >> _PageShift ++ x := k ++ x -= mheap_.arena_start >> _PageShift ++ s := h_spans[x] ++ if s == nil || pageID(k) < s.start || b >= s.limit || s.state != mSpanInUse { ++ return false ++ } ++ return true ++} + - // ptrmask can have 2 possible values: - // 1. nil - obtain pointer mask from GC bitmap. - // 2. pointer to a compact mask (for stacks and data). - goto_scanobj := b != 0 ++// Given an address in the heap return the relevant byte from the gcmap. This routine ++// can be used on addresses to the start of an object or to the interior of the an object. ++func slottombits(obj uintptr, mbits *markbits) { ++ off := (obj&^(ptrSize-1) - mheap_.arena_start) / ptrSize ++ mbits.bitp = (*byte)(unsafe.Pointer(mheap_.arena_start - off/wordsPerBitmapByte - 1)) ++ mbits.shift = off % wordsPerBitmapByte * gcBits ++ mbits.xbits = *mbits.bitp ++ mbits.bits = (mbits.xbits >> mbits.shift) & bitMask ++ mbits.tbits = ((mbits.xbits >> mbits.shift) & bitPtrMask) >> 2 ++} + ++// b is a pointer into the heap. ++// Find the start of the object refered to by b. ++// Set mbits to the associated bits from the bit map. ++// If b is not a valid heap object return nil and ++// undefined values in mbits. ++func objectstart(b uintptr, mbits *markbits) uintptr { ++ obj := b &^ (ptrSize - 1) + for { - if goto_scanobj { - goto_scanobj = false - } else { - if nobj == 0 { - // Out of work in workbuf. - if !keepworking { - putempty(wbuf) - return - } ++ slottombits(obj, mbits) ++ if mbits.bits&bitBoundary == bitBoundary { ++ break ++ } + - // Refill workbuf from global queue. - wbuf = getfull(wbuf) - if wbuf == nil { - return - } - nobj = wbuf.nobj - if nobj < uintptr(len(wbuf.obj)) { - wp = &wbuf.obj[nobj] - } else { - wp = nil - } ++ // Not a beginning of a block, consult span table to find the block beginning. ++ k := b >> _PageShift ++ x := k ++ x -= mheap_.arena_start >> _PageShift ++ s := h_spans[x] ++ if s == nil || pageID(k) < s.start || b >= s.limit || s.state != mSpanInUse { ++ if s != nil && s.state == _MSpanStack { ++ return 0 // This is legit. + } + - // If another proc wants a pointer, give it some. - if work.nwait > 0 && nobj > 4 && work.full == 0 { - wbuf.nobj = nobj - wbuf = handoff(wbuf) - nobj = wbuf.nobj - if nobj < uintptr(len(wbuf.obj)) { - wp = &wbuf.obj[nobj] ++ // The following ensures that we are rigorous about what data ++ // structures hold valid pointers ++ if false { ++ // Still happens sometimes. We don't know why. ++ printlock() ++ print("runtime:objectstart Span weird: obj=", hex(obj), " k=", hex(k)) ++ if s == nil { ++ print(" s=nil\n") + } else { - wp = nil ++ print(" s.start=", hex(s.start<<_PageShift), " s.limit=", hex(s.limit), " s.state=", s.state, "\n") + } ++ printunlock() ++ gothrow("objectstart: bad pointer in unexpected span") + } - - nobj-- - wp = &wbuf.obj[nobj] - b = *wp - n = arena_used - uintptr(b) - ptrmask = nil // use GC bitmap for pointer info ++ return 0 + } + - if _DebugGCPtrs { - print("scanblock ", b, " +", hex(n), " ", ptrmask, "\n") ++ p := uintptr(s.start) << _PageShift ++ if s.sizeclass != 0 { ++ size := s.elemsize ++ idx := (obj - p) / size ++ p = p + idx*size + } - - // Find bits of the beginning of the object. - if ptrmask == nil { - off := (uintptr(b) - arena_start) / ptrSize - ptrbitp = unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1) ++ if p == obj { ++ print("runtime: failed to find block beginning for ", hex(p), " s=", hex(s.start*_PageSize), " s.limit=", s.limit, "\n") ++ gothrow("failed to find block beginning") + } ++ obj = p ++ } + - var i uintptr - for i = 0; i < n; i += ptrSize { - // Find bits for this word. - var bits uintptr - if ptrmask == nil { - // Check if we have reached end of span. - if (uintptr(b)+i)%_PageSize == 0 && - h_spans[(uintptr(b)-arena_start)>>_PageShift] != h_spans[(uintptr(b)+i-arena_start)>>_PageShift] { - break - } ++ // if size(obj.firstfield) < PtrSize, the &obj.secondfield could map to the boundary bit ++ // Clear any low bits to get to the start of the object. ++ // greyobject depends on this. ++ return obj ++} + - // Consult GC bitmap. - bits = uintptr(*(*byte)(ptrbitp)) ++// Slow for now as we serialize this, since this is on a debug path ++// speed is not critical at this point. ++var andlock mutex + - if wordsPerBitmapByte != 2 { - gothrow("alg doesn't work for wordsPerBitmapByte != 2") - } - j := (uintptr(b) + i) / ptrSize & 1 - ptrbitp = add(ptrbitp, -j) - bits >>= gcBits * j ++func atomicand8(src *byte, val byte) { ++ lock(&andlock) ++ *src &= val ++ unlock(&andlock) ++} + - if bits&bitBoundary != 0 && i != 0 { - break // reached beginning of the next object - } - bits = (bits >> 2) & bitsMask - if bits == bitsDead { - break // reached no-scan part of the object - } - } else { - // dense mask (stack or data) - bits = (uintptr(*(*byte)(add(unsafe.Pointer(ptrmask), (i/ptrSize)/4))) >> (((i / ptrSize) % 4) * bitsPerPointer)) & bitsMask - } ++// Mark using the checkmark scheme. ++func docheckmark(mbits *markbits) { ++ // xor 01 moves 01(scalar unmarked) to 00(scalar marked) ++ // and 10(pointer unmarked) to 11(pointer marked) ++ if mbits.tbits == _BitsScalar { ++ atomicand8(mbits.bitp, ^byte(_BitsCheckMarkXor<> mbits.shift) & bitMask ++ mbits.tbits = ((mbits.xbits >> mbits.shift) & bitPtrMask) >> 2 ++} + - if bits != _BitsPointer { - gothrow("unexpected garbage collection bits") - } ++// In the default scheme does mbits refer to a marked object. ++func ismarked(mbits *markbits) bool { ++ if mbits.bits&bitBoundary != bitBoundary { ++ gothrow("ismarked: bits should have boundary bit set") ++ } ++ return mbits.bits&bitMarked == bitMarked ++} + - obj := *(*uintptr)(unsafe.Pointer(b + i)) - obj0 := obj ++// In the checkmark scheme does mbits refer to a marked object. ++func ischeckmarked(mbits *markbits) bool { ++ if mbits.bits&bitBoundary != bitBoundary { ++ gothrow("ischeckmarked: bits should have boundary bit set") ++ } ++ return mbits.tbits == _BitsScalarMarked || mbits.tbits == _BitsPointerMarked ++} + - markobj: - var s *mspan - var off, bitp, shift, xbits uintptr ++// When in GCmarkterminate phase we allocate black. ++func gcmarknewobject_m(obj uintptr) { ++ if gcphase != _GCmarktermination { ++ gothrow("marking new object while not in mark termination phase") ++ } ++ if checkmark { // The world should be stopped so this should not happen. ++ gothrow("gcmarknewobject called while doing checkmark") ++ } + - // At this point we have extracted the next potential pointer. - // Check if it points into heap. - if obj == 0 { - continue - } - if obj < arena_start || arena_used <= obj { - if uintptr(obj) < _PhysPageSize && invalidptr != 0 { - s = nil - goto badobj - } - continue - } ++ var mbits markbits ++ slottombits(obj, &mbits) ++ if mbits.bits&bitMarked != 0 { ++ return ++ } + - // Mark the object. - obj &^= ptrSize - 1 - off = (obj - arena_start) / ptrSize - bitp = arena_start - off/wordsPerBitmapByte - 1 - shift = (off % wordsPerBitmapByte) * gcBits - xbits = uintptr(*(*byte)(unsafe.Pointer(bitp))) - bits = (xbits >> shift) & bitMask - if (bits & bitBoundary) == 0 { - // Not a beginning of a block, consult span table to find the block beginning. - k := pageID(obj >> _PageShift) - x := k - x -= pageID(arena_start >> _PageShift) - s = h_spans[x] - if s == nil || k < s.start || s.limit <= obj || s.state != mSpanInUse { - // Stack pointers lie within the arena bounds but are not part of the GC heap. - // Ignore them. - if s != nil && s.state == _MSpanStack { - continue - } - goto badobj - } - p := uintptr(s.start) << _PageShift - if s.sizeclass != 0 { - size := s.elemsize - idx := (obj - p) / size - p = p + idx*size - } - if p == obj { - print("runtime: failed to find block beginning for ", hex(p), " s=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), "\n") - gothrow("failed to find block beginning") ++ // Each byte of GC bitmap holds info for two words. ++ // If the current object is larger than two words, or if the object is one word ++ // but the object it shares the byte with is already marked, ++ // then all the possible concurrent updates are trying to set the same bit, ++ // so we can use a non-atomic update. ++ if mbits.xbits&(bitMask|(bitMask<bits=", hex(mbits.bits), " *mbits->bitp=", hex(*mbits.bitp), "\n") ++ ++ k := obj >> _PageShift ++ x := k ++ x -= mheap_.arena_start >> _PageShift ++ s := h_spans[x] ++ printlock() ++ print("runtime:greyobject Span: obj=", hex(obj), " k=", hex(k)) ++ if s == nil { ++ print(" s=nil\n") ++ } else { ++ print(" s.start=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), " s.sizeclass=", s.sizeclass, " s.elemsize=", s.elemsize, "\n") ++ // NOTE(rsc): This code is using s.sizeclass as an approximation of the ++ // number of pointer-sized words in an object. Perhaps not what was intended. ++ for i := 0; i < int(s.sizeclass); i++ { ++ print(" *(obj+", i*ptrSize, ") = ", hex(*(*uintptr)(unsafe.Pointer(obj + uintptr(i)*ptrSize))), "\n") + } - obj = p - goto markobj + } ++ gothrow("checkmark found unmarked object") ++ } ++ if ischeckmarked(mbits) { ++ return wbuf ++ } ++ docheckmark(mbits) ++ if !ischeckmarked(mbits) { ++ print("mbits xbits=", hex(mbits.xbits), " bits=", hex(mbits.bits), " tbits=", hex(mbits.tbits), " shift=", mbits.shift, "\n") ++ gothrow("docheckmark and ischeckmarked disagree") ++ } ++ } else { ++ // If marked we have nothing to do. ++ if mbits.bits&bitMarked != 0 { ++ return wbuf ++ } + - if _DebugGCPtrs { - print("scan *", hex(b+i), " = ", hex(obj0), " => base ", hex(obj), "\n") - } ++ // Each byte of GC bitmap holds info for two words. ++ // If the current object is larger than two words, or if the object is one word ++ // but the object it shares the byte with is already marked, ++ // then all the possible concurrent updates are trying to set the same bit, ++ // so we can use a non-atomic update. ++ if mbits.xbits&(bitMask|bitMask< 0 && obj == badblock[nbadblock-1] { - // Running garbage collection again because - // we want to find the path from a root to a bad pointer. - // Found possible next step; extend or finish path. - for j := int32(0); j < nbadblock; j++ { - if badblock[j] == b { - goto AlreadyBad - } - } - print("runtime: found *(", hex(b), "+", hex(i), ") = ", hex(obj0), "+", hex(obj-obj0), "\n") - if ptrmask != nil { - gothrow("bad pointer") - } - if nbadblock >= int32(len(badblock)) { - gothrow("badblock trace too long") - } - badblock[nbadblock] = uintptr(b) - nbadblock++ - AlreadyBad: ++ if !checkmark && (mbits.xbits>>(mbits.shift+2))&_BitsMask == _BitsDead { ++ return wbuf // noscan object ++ } ++ ++ // Queue the obj for scanning. The PREFETCH(obj) logic has been removed but ++ // seems like a nice optimization that can be added back in. ++ // There needs to be time between the PREFETCH and the use. ++ // Previously we put the obj in an 8 element buffer that is drained at a rate ++ // to give the PREFETCH time to do its work. ++ // Use of PREFETCHNTA might be more appropriate than PREFETCH ++ ++ // If workbuf is full, obtain an empty one. ++ if wbuf.nobj >= uintptr(len(wbuf.obj)) { ++ wbuf = getempty(wbuf) ++ } ++ ++ wbuf.obj[wbuf.nobj] = obj ++ wbuf.nobj++ ++ return wbuf ++} ++ ++// Scan the object b of size n, adding pointers to wbuf. ++// Return possibly new wbuf to use. ++// If ptrmask != nil, it specifies where pointers are in b. ++// If ptrmask == nil, the GC bitmap should be consulted. ++// In this case, n may be an overestimate of the size; the GC bitmap ++// must also be used to make sure the scan stops at the end of b. ++func scanobject(b, n uintptr, ptrmask *uint8, wbuf *workbuf) *workbuf { ++ arena_start := mheap_.arena_start ++ arena_used := mheap_.arena_used ++ ++ // Find bits of the beginning of the object. ++ var ptrbitp unsafe.Pointer ++ var mbits markbits ++ if ptrmask == nil { ++ b = objectstart(b, &mbits) ++ if b == 0 { ++ return wbuf ++ } ++ ptrbitp = unsafe.Pointer(mbits.bitp) ++ } ++ for i := uintptr(0); i < n; i += ptrSize { ++ // Find bits for this word. ++ var bits uintptr ++ if ptrmask != nil { ++ // dense mask (stack or data) ++ bits = (uintptr(*(*byte)(add(unsafe.Pointer(ptrmask), (i/ptrSize)/4))) >> (((i / ptrSize) % 4) * bitsPerPointer)) & bitsMask ++ } else { ++ // Check if we have reached end of span. ++ // n is an overestimate of the size of the object. ++ if (b+i)%_PageSize == 0 && h_spans[(b-arena_start)>>_PageShift] != h_spans[(b+i-arena_start)>>_PageShift] { ++ break + } + - // Now we have bits, bitp, and shift correct for - // obj pointing at the base of the object. - // Only care about not marked objects. - if bits&bitMarked != 0 { - continue ++ // Consult GC bitmap. ++ bits = uintptr(*(*byte)(ptrbitp)) ++ if wordsPerBitmapByte != 2 { ++ gothrow("alg doesn't work for wordsPerBitmapByte != 2") ++ } ++ j := (uintptr(b) + i) / ptrSize & 1 // j indicates upper nibble or lower nibble ++ bits >>= gcBits * j ++ if i == 0 { ++ bits &^= bitBoundary + } ++ ptrbitp = add(ptrbitp, -j) + - // If obj size is greater than 8, then each byte of GC bitmap - // contains info for at most one object. In such case we use - // non-atomic byte store to mark the object. This can lead - // to double enqueue of the object for scanning, but scanning - // is an idempotent operation, so it is OK. This cannot lead - // to bitmap corruption because the single marked bit is the - // only thing that can change in the byte. - // For 8-byte objects we use non-atomic store, if the other - // quadruple is already marked. Otherwise we resort to CAS - // loop for marking. - if xbits&(bitMask|bitMask<> 2 // bits refer to the type bits. + - if (xbits>>(shift+2))&bitsMask == bitsDead { - continue // noscan object ++ if i != 0 && bits == bitsDead { // BitsDead in first nibble not valid during checkmark ++ break // reached no-scan part of the object + } ++ } + - // Queue the obj for scanning. - // TODO: PREFETCH here. ++ if bits <= _BitsScalar { // _BitsScalar, _BitsDead, _BitsScalarMarked ++ continue ++ } + - // If workbuf is full, obtain an empty one. - if nobj >= uintptr(len(wbuf.obj)) { - wbuf.nobj = nobj - wbuf = getempty(wbuf) - nobj = wbuf.nobj - wp = &wbuf.obj[nobj] - } - *wp = obj - nobj++ - if nobj < uintptr(len(wbuf.obj)) { - wp = &wbuf.obj[nobj] - } else { - wp = nil - } ++ if bits&_BitsPointer != _BitsPointer { ++ print("gc checkmark=", checkmark, " b=", hex(b), " ptrmask=", ptrmask, " mbits.bitp=", mbits.bitp, " mbits.xbits=", hex(mbits.xbits), " bits=", hex(bits), "\n") ++ gothrow("unexpected garbage collection bits") ++ } ++ ++ obj := *(*uintptr)(unsafe.Pointer(b + i)) ++ ++ // At this point we have extracted the next potential pointer. ++ // Check if it points into heap. ++ if obj == 0 || obj < arena_start || obj >= arena_used { + continue ++ } + - badobj: - // If cgo_allocate is linked into the binary, it can allocate - // memory as []unsafe.Pointer that may not contain actual - // pointers and must be scanned conservatively. - // In this case alone, allow the bad pointer. - if have_cgo_allocate() && ptrmask == nil { - continue - } ++ // Mark the object. return some important bits. ++ // We we combine the following two rotines we don't have to pass mbits or obj around. ++ var mbits markbits ++ obj = objectstart(obj, &mbits) ++ if obj == 0 { ++ continue ++ } ++ wbuf = greyobject(obj, &mbits, wbuf) ++ } ++ return wbuf ++} + - // Anything else indicates a bug somewhere. - // If we're in the middle of chasing down a different bad pointer, - // don't confuse the trace by printing about this one. - if nbadblock > 0 { - continue ++// scanblock starts by scanning b as scanobject would. ++// If the gcphase is GCscan, that's all scanblock does. ++// Otherwise it traverses some fraction of the pointers it found in b, recursively. ++// As a special case, scanblock(nil, 0, nil) means to scan previously queued work, ++// stopping only when no work is left in the system. ++func scanblock(b, n uintptr, ptrmask *uint8) { ++ wbuf := getpartialorempty() ++ if b != 0 { ++ wbuf = scanobject(b, n, ptrmask, wbuf) ++ if gcphase == _GCscan { ++ if inheap(b) && ptrmask == nil { ++ // b is in heap, we are in GCscan so there should be a ptrmask. ++ gothrow("scanblock: In GCscan phase and inheap is true.") + } ++ // GCscan only goes one level deep since mark wb not turned on. ++ putpartial(wbuf) ++ return ++ } ++ } ++ if gcphase == _GCscan { ++ gothrow("scanblock: In GCscan phase but no b passed in.") ++ } + - print("runtime: garbage collector found invalid heap pointer *(", hex(b), "+", hex(i), ")=", hex(obj)) - if s == nil { - print(" s=nil\n") - } else { - print(" span=", uintptr(s.start)<<_PageShift, "-", s.limit, "-", (uintptr(s.start)+s.npages)<<_PageShift, " state=", s.state, "\n") ++ keepworking := b == 0 ++ ++ // ptrmask can have 2 possible values: ++ // 1. nil - obtain pointer mask from GC bitmap. ++ // 2. pointer to a compact mask (for stacks and data). ++ for { ++ if wbuf.nobj == 0 { ++ if !keepworking { ++ putempty(wbuf) ++ return + } - if ptrmask != nil { - gothrow("invalid heap pointer") ++ // Refill workbuf from global queue. ++ wbuf = getfull(wbuf) ++ if wbuf == nil { // nil means out of work barrier reached ++ return + } - // Add to badblock list, which will cause the garbage collection - // to keep repeating until it has traced the chain of pointers - // leading to obj all the way back to a root. - if nbadblock == 0 { - badblock[nbadblock] = uintptr(b) - nbadblock++ ++ ++ if wbuf.nobj <= 0 { ++ gothrow("runtime:scanblock getfull returns empty buffer") + } + } - if _DebugGCPtrs { - print("end scanblock ", hex(b), " +", hex(n), " ", ptrmask, "\n") - } - if _DebugGC > 0 && ptrmask == nil { - // For heap objects ensure that we did not overscan. - var p, n uintptr - if mlookup(b, &p, &n, nil) == 0 || b != p || i > n { - print("runtime: scanned (", hex(b), "+", hex(i), "), heap object (", hex(p), "+", hex(n), ")\n") - gothrow("scanblock: scanned invalid object") - } ++ ++ // If another proc wants a pointer, give it some. ++ if work.nwait > 0 && wbuf.nobj > 4 && work.full == 0 { ++ wbuf = handoff(wbuf) + } ++ ++ // This might be a good place to add prefetch code... ++ // if(wbuf->nobj > 4) { ++ // PREFETCH(wbuf->obj[wbuf->nobj - 3]; ++ // } ++ wbuf.nobj-- ++ b = wbuf.obj[wbuf.nobj] ++ wbuf = scanobject(b, mheap_.arena_used-b, nil, wbuf) + } + } + + func markroot(desc *parfor, i uint32) { + // Note: if you add a case here, please also update heapdump.c:dumproots. + switch i { + case _RootData: + scanblock(uintptr(unsafe.Pointer(&data)), uintptr(unsafe.Pointer(&edata))-uintptr(unsafe.Pointer(&data)), gcdatamask.bytedata) + + case _RootBss: + scanblock(uintptr(unsafe.Pointer(&bss)), uintptr(unsafe.Pointer(&ebss))-uintptr(unsafe.Pointer(&bss)), gcbssmask.bytedata) + + case _RootFinalizers: + for fb := allfin; fb != nil; fb = fb.alllink { + scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), uintptr(fb.cnt)*unsafe.Sizeof(fb.fin[0]), &finptrmask[0]) + } + + case _RootSpans: + // mark MSpan.specials + sg := mheap_.sweepgen + for spanidx := uint32(0); spanidx < uint32(len(work.spans)); spanidx++ { + s := work.spans[spanidx] + if s.state != mSpanInUse { + continue + } - if s.sweepgen != sg { ++ if !checkmark && s.sweepgen != sg { ++ // sweepgen was updated (+2) during non-checkmark GC pass + print("sweep ", s.sweepgen, " ", sg, "\n") + gothrow("gc: unswept span") + } + for sp := s.specials; sp != nil; sp = sp.next { + if sp.kind != _KindSpecialFinalizer { + continue + } + // don't mark finalized object, but scan it so we + // retain everything it points to. + spf := (*specialfinalizer)(unsafe.Pointer(sp)) + // A finalizer can be set for an inner byte of an object, find object beginning. + p := uintptr(s.start<<_PageShift) + uintptr(spf.special.offset)/s.elemsize*s.elemsize - scanblock(p, s.elemsize, nil) ++ if gcphase != _GCscan { ++ scanblock(p, s.elemsize, nil) // scanned during mark phase ++ } + scanblock(uintptr(unsafe.Pointer(&spf.fn)), ptrSize, &oneptr[0]) + } + } + + case _RootFlushCaches: - flushallmcaches() ++ if gcphase != _GCscan { // Do not flush mcaches during GCscan phase. ++ flushallmcaches() ++ } + + default: + // the rest is scanning goroutine stacks + if uintptr(i-_RootCount) >= allglen { + gothrow("markroot: bad index") + } + gp := allgs[i-_RootCount] ++ + // remember when we've first observed the G blocked + // needed only to output in traceback - status := readgstatus(gp) ++ status := readgstatus(gp) // We are not in a scan state + if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 { + gp.waitsince = work.tstart + } - // Shrink a stack if not much of it is being used. - shrinkstack(gp) ++ ++ // Shrink a stack if not much of it is being used but not in the scan phase. ++ if gcphase != _GCscan { // Do not shrink during GCscan phase. ++ shrinkstack(gp) ++ } + if readgstatus(gp) == _Gdead { + gp.gcworkdone = true + } else { + gp.gcworkdone = false + } + restart := stopg(gp) - scanstack(gp) ++ ++ // goroutine will scan its own stack when it stops running. ++ // Wait until it has. ++ for readgstatus(gp) == _Grunning && !gp.gcworkdone { ++ } ++ ++ // scanstack(gp) is done as part of gcphasework ++ // But to make sure we finished we need to make sure that ++ // the stack traps have all responded so drop into ++ // this while loop until they respond. ++ for !gp.gcworkdone { ++ status = readgstatus(gp) ++ if status == _Gdead { ++ gp.gcworkdone = true // scan is a noop ++ break ++ } ++ if status == _Gwaiting || status == _Grunnable { ++ restart = stopg(gp) ++ } ++ } + if restart { + restartg(gp) + } + } + } + + // Get an empty work buffer off the work.empty list, + // allocating new buffers as needed. + func getempty(b *workbuf) *workbuf { - _g_ := getg() + if b != nil { - lfstackpush(&work.full, &b.node) ++ putfull(b) ++ b = nil + } - b = nil - c := _g_.m.mcache - if c.gcworkbuf != nil { - b = (*workbuf)(c.gcworkbuf) - c.gcworkbuf = nil - } - if b == nil { ++ if work.empty != 0 { + b = (*workbuf)(lfstackpop(&work.empty)) + } ++ if b != nil && b.nobj != 0 { ++ _g_ := getg() ++ print("m", _g_.m.id, ": getempty: popped b=", b, " with non-zero b.nobj=", b.nobj, "\n") ++ gothrow("getempty: workbuffer not empty, b->nobj not 0") ++ } + if b == nil { + b = (*workbuf)(persistentalloc(unsafe.Sizeof(*b), _CacheLineSize, &memstats.gc_sys)) ++ b.nobj = 0 + } - b.nobj = 0 + return b + } + + func putempty(b *workbuf) { - _g_ := getg() - c := _g_.m.mcache - if c.gcworkbuf == nil { - c.gcworkbuf = (unsafe.Pointer)(b) - return ++ if b.nobj != 0 { ++ gothrow("putempty: b->nobj not 0") + } + lfstackpush(&work.empty, &b.node) + } + -func gcworkbuffree(b unsafe.Pointer) { - if b != nil { - putempty((*workbuf)(b)) ++func putfull(b *workbuf) { ++ if b.nobj <= 0 { ++ gothrow("putfull: b->nobj <= 0") ++ } ++ lfstackpush(&work.full, &b.node) ++} ++ ++// Get an partially empty work buffer ++// if none are available get an empty one. ++func getpartialorempty() *workbuf { ++ b := (*workbuf)(lfstackpop(&work.partial)) ++ if b == nil { ++ b = getempty(nil) + } ++ return b + } + -// Get a full work buffer off the work.full list, or return nil. ++func putpartial(b *workbuf) { ++ if b.nobj == 0 { ++ lfstackpush(&work.empty, &b.node) ++ } else if b.nobj < uintptr(len(b.obj)) { ++ lfstackpush(&work.partial, &b.node) ++ } else if b.nobj == uintptr(len(b.obj)) { ++ lfstackpush(&work.full, &b.node) ++ } else { ++ print("b=", b, " b.nobj=", b.nobj, " len(b.obj)=", len(b.obj), "\n") ++ gothrow("putpartial: bad Workbuf b.nobj") ++ } ++} ++ ++// Get a full work buffer off the work.full or a partially ++// filled one off the work.partial list. If nothing is available ++// wait until all the other gc helpers have finished and then ++// return nil. ++// getfull acts as a barrier for work.nproc helpers. As long as one ++// gchelper is actively marking objects it ++// may create a workbuffer that the other helpers can work on. ++// The for loop either exits when a work buffer is found ++// or when _all_ of the work.nproc GC helpers are in the loop ++// looking for work and thus not capable of creating new work. ++// This is in fact the termination condition for the STW mark ++// phase. + func getfull(b *workbuf) *workbuf { + if b != nil { - lfstackpush(&work.empty, &b.node) ++ putempty(b) + } ++ + b = (*workbuf)(lfstackpop(&work.full)) ++ if b == nil { ++ b = (*workbuf)(lfstackpop(&work.partial)) ++ } + if b != nil || work.nproc == 1 { + return b + } + + xadd(&work.nwait, +1) + for i := 0; ; i++ { + if work.full != 0 { + xadd(&work.nwait, -1) + b = (*workbuf)(lfstackpop(&work.full)) ++ if b == nil { ++ b = (*workbuf)(lfstackpop(&work.partial)) ++ } + if b != nil { + return b + } + xadd(&work.nwait, +1) + } + if work.nwait == work.nproc { + return nil + } + _g_ := getg() + if i < 10 { + _g_.m.gcstats.nprocyield++ + procyield(20) + } else if i < 20 { + _g_.m.gcstats.nosyield++ + osyield() + } else { + _g_.m.gcstats.nsleep++ + usleep(100) + } + } + } + + func handoff(b *workbuf) *workbuf { + // Make new buffer with half of b's pointers. + b1 := getempty(nil) + n := b.nobj / 2 + b.nobj -= n + b1.nobj = n + memmove(unsafe.Pointer(&b1.obj[0]), unsafe.Pointer(&b.obj[b.nobj]), n*unsafe.Sizeof(b1.obj[0])) + _g_ := getg() + _g_.m.gcstats.nhandoff++ + _g_.m.gcstats.nhandoffcnt += uint64(n) + + // Put b on full list - let first half of b get stolen. + lfstackpush(&work.full, &b.node) + return b1 + } + + func stackmapdata(stkmap *stackmap, n int32) bitvector { + if n < 0 || n >= stkmap.n { + gothrow("stackmapdata: index out of range") + } + return bitvector{stkmap.nbit, (*byte)(add(unsafe.Pointer(&stkmap.bytedata), uintptr(n*((stkmap.nbit+31)/32*4))))} + } + + // Scan a stack frame: local variables and function arguments/results. + func scanframe(frame *stkframe, unused unsafe.Pointer) bool { + + f := frame.fn + targetpc := frame.continpc + if targetpc == 0 { + // Frame is dead. + return true + } + if _DebugGC > 1 { + print("scanframe ", gofuncname(f), "\n") + } + if targetpc != f.entry { + targetpc-- + } + pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc) + if pcdata == -1 { + // We do not have a valid pcdata value but there might be a + // stackmap for this function. It is likely that we are looking + // at the function prologue, assume so and hope for the best. + pcdata = 0 + } + + // Scan local variables if stack frame has been allocated. + size := frame.varp - frame.sp + var minsize uintptr + if thechar != '6' && thechar != '8' { + minsize = ptrSize + } else { + minsize = 0 + } + if size > minsize { + stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps)) + if stkmap == nil || stkmap.n <= 0 { + print("runtime: frame ", gofuncname(f), " untyped locals ", hex(frame.varp-size), "+", hex(size), "\n") + gothrow("missing stackmap") + } + + // Locals bitmap information, scan just the pointers in locals. + if pcdata < 0 || pcdata >= stkmap.n { + // don't know where we are + print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " locals stack map entries for ", gofuncname(f), " (targetpc=", targetpc, ")\n") + gothrow("scanframe: bad symbol table") + } + bv := stackmapdata(stkmap, pcdata) + size = (uintptr(bv.n) * ptrSize) / bitsPerPointer + scanblock(frame.varp-size, uintptr(bv.n)/bitsPerPointer*ptrSize, bv.bytedata) + } + + // Scan arguments. + if frame.arglen > 0 { + var bv bitvector + if frame.argmap != nil { + bv = *frame.argmap + } else { + stkmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps)) + if stkmap == nil || stkmap.n <= 0 { + print("runtime: frame ", gofuncname(f), " untyped args ", hex(frame.argp), "+", hex(frame.arglen), "\n") + gothrow("missing stackmap") + } + if pcdata < 0 || pcdata >= stkmap.n { + // don't know where we are + print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " args stack map entries for ", gofuncname(f), " (targetpc=", targetpc, ")\n") + gothrow("scanframe: bad symbol table") + } + bv = stackmapdata(stkmap, pcdata) + } + scanblock(frame.argp, uintptr(bv.n)/bitsPerPointer*ptrSize, bv.bytedata) + } + return true + } + + func scanstack(gp *g) { + // TODO(rsc): Due to a precedence error, this was never checked in the original C version. + // If you enable the check, the gothrow happens. + /* + if readgstatus(gp)&_Gscan == 0 { + print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") + gothrow("mark - bad status") + } + */ + + switch readgstatus(gp) &^ _Gscan { + default: + print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") + gothrow("mark - bad status") + case _Gdead: + return + case _Grunning: + print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") - gothrow("mark - world not stopped") ++ gothrow("scanstack: goroutine not stopped") + case _Grunnable, _Gsyscall, _Gwaiting: + // ok + } + + if gp == getg() { + gothrow("can't scan our own stack") + } + mp := gp.m + if mp != nil && mp.helpgc != 0 { + gothrow("can't scan gchelper stack") + } + + gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0) + tracebackdefers(gp, scanframe, nil) + } + -// The gp has been moved to a gc safepoint. If there is gcphase specific -// work it is done here. ++// If the slot is grey or black return true, if white return false. ++// If the slot is not in the known heap and thus does not have a valid GC bitmap then ++// it is considered grey. Globals and stacks can hold such slots. ++// The slot is grey if its mark bit is set and it is enqueued to be scanned. ++// The slot is black if it has already been scanned. ++// It is white if it has a valid mark bit and the bit is not set. ++func shaded(slot uintptr) bool { ++ if !inheap(slot) { // non-heap slots considered grey ++ return true ++ } ++ ++ var mbits markbits ++ valid := objectstart(slot, &mbits) ++ if valid == 0 { ++ return true ++ } ++ ++ if checkmark { ++ return ischeckmarked(&mbits) ++ } ++ ++ return mbits.bits&bitMarked != 0 ++} ++ ++// Shade the object if it isn't already. ++// The object is not nil and known to be in the heap. ++func shade(b uintptr) { ++ if !inheap(b) { ++ gothrow("shade: passed an address not in the heap") ++ } ++ ++ wbuf := getpartialorempty() ++ // Mark the object, return some important bits. ++ // If we combine the following two rotines we don't have to pass mbits or obj around. ++ var mbits markbits ++ obj := objectstart(b, &mbits) ++ if obj != 0 { ++ wbuf = greyobject(obj, &mbits, wbuf) // augments the wbuf ++ } ++ putpartial(wbuf) ++} ++ ++// This is the Dijkstra barrier coarsened to always shade the ptr (dst) object. ++// The original Dijkstra barrier only shaded ptrs being placed in black slots. ++// ++// Shade indicates that it has seen a white pointer by adding the referent ++// to wbuf as well as marking it. ++// ++// slot is the destination (dst) in go code ++// ptr is the value that goes into the slot (src) in the go code ++// ++// Dijkstra pointed out that maintaining the no black to white ++// pointers means that white to white pointers not need ++// to be noted by the write barrier. Furthermore if either ++// white object dies before it is reached by the ++// GC then the object can be collected during this GC cycle ++// instead of waiting for the next cycle. Unfortunately the cost of ++// ensure that the object holding the slot doesn't concurrently ++// change to black without the mutator noticing seems prohibitive. ++// ++// Consider the following example where the mutator writes into ++// a slot and then loads the slot's mark bit while the GC thread ++// writes to the slot's mark bit and then as part of scanning reads ++// the slot. ++// ++// Initially both [slot] and [slotmark] are 0 (nil) ++// Mutator thread GC thread ++// st [slot], ptr st [slotmark], 1 ++// ++// ld r1, [slotmark] ld r2, [slot] ++// ++// This is a classic example of independent reads of independent writes, ++// aka IRIW. The question is if r1==r2==0 is allowed and for most HW the ++// answer is yes without inserting a memory barriers between the st and the ld. ++// These barriers are expensive so we have decided that we will ++// always grey the ptr object regardless of the slot's color. ++func gcmarkwb_m(slot *uintptr, ptr uintptr) { ++ switch gcphase { ++ default: ++ gothrow("gcphasework in bad gcphase") ++ ++ case _GCoff, _GCquiesce, _GCstw, _GCsweep, _GCscan: ++ // ok ++ ++ case _GCmark, _GCmarktermination: ++ if ptr != 0 && inheap(ptr) { ++ shade(ptr) ++ } ++ } ++} ++ ++// The gp has been moved to a GC safepoint. GC phase specific ++// work is done here. + func gcphasework(gp *g) { + switch gcphase { + default: + gothrow("gcphasework in bad gcphase") + case _GCoff, _GCquiesce, _GCstw, _GCsweep: - // No work for now. ++ // No work. ++ case _GCscan: ++ // scan the stack, mark the objects, put pointers in work buffers ++ // hanging off the P where this is being run. ++ scanstack(gp) + case _GCmark: - // Disabled until concurrent GC is implemented - // but indicate the scan has been done. - // scanstack(gp); ++ // No work. ++ case _GCmarktermination: ++ scanstack(gp) ++ // All available mark work will be emptied before returning. + } + gp.gcworkdone = true + } + + var finalizer1 = [...]byte{ + // Each Finalizer is 5 words, ptr ptr uintptr ptr ptr. + // Each byte describes 4 words. + // Need 4 Finalizers described by 5 bytes before pattern repeats: + // ptr ptr uintptr ptr ptr + // ptr ptr uintptr ptr ptr + // ptr ptr uintptr ptr ptr + // ptr ptr uintptr ptr ptr + // aka + // ptr ptr uintptr ptr + // ptr ptr ptr uintptr + // ptr ptr ptr ptr + // uintptr ptr ptr ptr + // ptr uintptr ptr ptr + // Assumptions about Finalizer layout checked below. + bitsPointer | bitsPointer<<2 | bitsScalar<<4 | bitsPointer<<6, + bitsPointer | bitsPointer<<2 | bitsPointer<<4 | bitsScalar<<6, + bitsPointer | bitsPointer<<2 | bitsPointer<<4 | bitsPointer<<6, + bitsScalar | bitsPointer<<2 | bitsPointer<<4 | bitsPointer<<6, + bitsPointer | bitsScalar<<2 | bitsPointer<<4 | bitsPointer<<6, + } + + func queuefinalizer(p unsafe.Pointer, fn *funcval, nret uintptr, fint *_type, ot *ptrtype) { + lock(&finlock) + if finq == nil || finq.cnt == finq.cap { + if finc == nil { + finc = (*finblock)(persistentalloc(_FinBlockSize, 0, &memstats.gc_sys)) + finc.cap = int32((_FinBlockSize-unsafe.Sizeof(finblock{}))/unsafe.Sizeof(finalizer{}) + 1) + finc.alllink = allfin + allfin = finc + if finptrmask[0] == 0 { + // Build pointer mask for Finalizer array in block. + // Check assumptions made in finalizer1 array above. + if (unsafe.Sizeof(finalizer{}) != 5*ptrSize || + unsafe.Offsetof(finalizer{}.fn) != 0 || + unsafe.Offsetof(finalizer{}.arg) != ptrSize || + unsafe.Offsetof(finalizer{}.nret) != 2*ptrSize || + unsafe.Offsetof(finalizer{}.fint) != 3*ptrSize || + unsafe.Offsetof(finalizer{}.ot) != 4*ptrSize || + bitsPerPointer != 2) { + gothrow("finalizer out of sync") + } + for i := range finptrmask { + finptrmask[i] = finalizer1[i%len(finalizer1)] + } + } + } + block := finc + finc = block.next + block.next = finq + finq = block + } + f := (*finalizer)(add(unsafe.Pointer(&finq.fin[0]), uintptr(finq.cnt)*unsafe.Sizeof(finq.fin[0]))) + finq.cnt++ + f.fn = fn + f.nret = nret + f.fint = fint + f.ot = ot + f.arg = p + fingwake = true + unlock(&finlock) + } + + func iterate_finq(callback func(*funcval, unsafe.Pointer, uintptr, *_type, *ptrtype)) { + for fb := allfin; fb != nil; fb = fb.alllink { + for i := int32(0); i < fb.cnt; i++ { + f := &fb.fin[i] + callback(f.fn, f.arg, f.nret, f.fint, f.ot) + } + } + } + ++// Returns only when span s has been swept. + func mSpan_EnsureSwept(s *mspan) { + // Caller must disable preemption. + // Otherwise when this function returns the span can become unswept again + // (if GC is triggered on another goroutine). + _g_ := getg() + if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 { + gothrow("MSpan_EnsureSwept: m is not locked") + } + + sg := mheap_.sweepgen + if atomicload(&s.sweepgen) == sg { + return + } ++ // The caller must be sure that the span is a MSpanInUse span. + if cas(&s.sweepgen, sg-2, sg-1) { + mSpan_Sweep(s, false) + return + } + // unfortunate condition, and we don't have efficient means to wait + for atomicload(&s.sweepgen) != sg { + osyield() + } + } + + // Sweep frees or collects finalizers for blocks not marked in the mark phase. + // It clears the mark bits in preparation for the next GC round. + // Returns true if the span was returned to heap. + // If preserve=true, don't return it to heap nor relink in MCentral lists; + // caller takes care of it. + func mSpan_Sweep(s *mspan, preserve bool) bool { ++ if checkmark { ++ gothrow("MSpan_Sweep: checkmark only runs in STW and after the sweep") ++ } ++ + // It's critical that we enter this function with preemption disabled, + // GC must not start while we are in the middle of this function. + _g_ := getg() + if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 { + gothrow("MSpan_Sweep: m is not locked") + } + sweepgen := mheap_.sweepgen + if s.state != mSpanInUse || s.sweepgen != sweepgen-1 { + print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") + gothrow("MSpan_Sweep: bad span state") + } + arena_start := mheap_.arena_start + cl := s.sizeclass + size := s.elemsize + var n int32 + var npages int32 + if cl == 0 { + n = 1 + } else { + // Chunk full of small blocks. + npages = class_to_allocnpages[cl] + n = (npages << _PageShift) / int32(size) + } + res := false + nfree := 0 + var head mlink + end := &head + c := _g_.m.mcache + sweepgenset := false + + // Mark any free objects in this span so we don't collect them. + for link := s.freelist; link != nil; link = link.next { + off := (uintptr(unsafe.Pointer(link)) - arena_start) / ptrSize + bitp := arena_start - off/wordsPerBitmapByte - 1 + shift := (off % wordsPerBitmapByte) * gcBits + *(*byte)(unsafe.Pointer(bitp)) |= bitMarked << shift + } + + // Unlink & free special records for any objects we're about to free. + specialp := &s.specials + special := *specialp + for special != nil { + // A finalizer can be set for an inner byte of an object, find object beginning. + p := uintptr(s.start<<_PageShift) + uintptr(special.offset)/size*size + off := (p - arena_start) / ptrSize + bitp := arena_start - off/wordsPerBitmapByte - 1 + shift := (off % wordsPerBitmapByte) * gcBits + bits := (*(*byte)(unsafe.Pointer(bitp)) >> shift) & bitMask + if bits&bitMarked == 0 { + // Find the exact byte for which the special was setup + // (as opposed to object beginning). + p := uintptr(s.start<<_PageShift) + uintptr(special.offset) + // about to free object: splice out special record + y := special + special = special.next + *specialp = special + if !freespecial(y, unsafe.Pointer(p), size, false) { + // stop freeing of object if it has a finalizer + *(*byte)(unsafe.Pointer(bitp)) |= bitMarked << shift + } + } else { + // object is still live: keep special record + specialp = &special.next + special = *specialp + } + } + + // Sweep through n objects of given size starting at p. + // This thread owns the span now, so it can manipulate + // the block bitmap without atomic operations. + p := uintptr(s.start << _PageShift) + off := (p - arena_start) / ptrSize + bitp := arena_start - off/wordsPerBitmapByte - 1 + shift := uint(0) + step := size / (ptrSize * wordsPerBitmapByte) + // Rewind to the previous quadruple as we move to the next + // in the beginning of the loop. + bitp += step + if step == 0 { + // 8-byte objects. + bitp++ + shift = gcBits + } + for ; n > 0; n, p = n-1, p+size { + bitp -= step + if step == 0 { + if shift != 0 { + bitp-- + } + shift = gcBits - shift + } + + xbits := *(*byte)(unsafe.Pointer(bitp)) + bits := (xbits >> shift) & bitMask + + // Allocated and marked object, reset bits to allocated. + if bits&bitMarked != 0 { + *(*byte)(unsafe.Pointer(bitp)) &^= bitMarked << shift + continue + } + + // At this point we know that we are looking at garbage object + // that needs to be collected. + if debug.allocfreetrace != 0 { + tracefree(unsafe.Pointer(p), size) + } + + // Reset to allocated+noscan. + *(*byte)(unsafe.Pointer(bitp)) = uint8(uintptr(xbits&^((bitMarked|bitsMask<<2)< 0 { + s.limit = 0 // prevent mlookup from finding this span + sysFault(unsafe.Pointer(p), size) + } else { + mHeap_Free(&mheap_, s, 1) + } + c.local_nlargefree++ + c.local_largefree += size + xadd64(&memstats.next_gc, -int64(size)*int64(gcpercent+100)/100) + res = true + } else { + // Free small object. + if size > 2*ptrSize { + *(*uintptr)(unsafe.Pointer(p + ptrSize)) = uintptrMask & 0xdeaddeaddeaddead // mark as "needs to be zeroed" + } else if size > ptrSize { + *(*uintptr)(unsafe.Pointer(p + ptrSize)) = 0 + } + end.next = (*mlink)(unsafe.Pointer(p)) + end = end.next + nfree++ + } + } + + // We need to set s.sweepgen = h.sweepgen only when all blocks are swept, + // because of the potential for a concurrent free/SetFinalizer. + // But we need to set it before we make the span available for allocation + // (return it to heap or mcentral), because allocation code assumes that a + // span is already swept if available for allocation. + if !sweepgenset && nfree == 0 { + // The span must be in our exclusive ownership until we update sweepgen, + // check for potential races. + if s.state != mSpanInUse || s.sweepgen != sweepgen-1 { + print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") + gothrow("MSpan_Sweep: bad span state after sweep") + } + atomicstore(&s.sweepgen, sweepgen) + } + if nfree > 0 { + c.local_nsmallfree[cl] += uintptr(nfree) + c.local_cachealloc -= intptr(uintptr(nfree) * size) + xadd64(&memstats.next_gc, -int64(nfree)*int64(size)*int64(gcpercent+100)/100) + res = mCentral_FreeSpan(&mheap_.central[cl].mcentral, s, int32(nfree), head.next, end, preserve) + // MCentral_FreeSpan updates sweepgen + } + return res + } + + // State of background sweep. + // Protected by gclock. + type sweepdata struct { + g *g + parked bool + started bool + + spanidx uint32 // background sweeper position + + nbgsweep uint32 + npausesweep uint32 + } + + var sweep sweepdata + + // sweeps one span + // returns number of pages returned to heap, or ^uintptr(0) if there is nothing to sweep + func sweepone() uintptr { + _g_ := getg() + + // increment locks to ensure that the goroutine is not preempted + // in the middle of sweep thus leaving the span in an inconsistent state for next GC + _g_.m.locks++ + sg := mheap_.sweepgen + for { + idx := xadd(&sweep.spanidx, 1) - 1 + if idx >= uint32(len(work.spans)) { + mheap_.sweepdone = 1 + _g_.m.locks-- + return ^uintptr(0) + } + s := work.spans[idx] + if s.state != mSpanInUse { + s.sweepgen = sg + continue + } + if s.sweepgen != sg-2 || !cas(&s.sweepgen, sg-2, sg-1) { + continue + } + npages := s.npages + if !mSpan_Sweep(s, false) { + npages = 0 + } + _g_.m.locks-- + return npages + } + } + + func gosweepone() uintptr { + var ret uintptr + systemstack(func() { + ret = sweepone() + }) + return ret + } + + func gosweepdone() bool { + return mheap_.sweepdone != 0 + } + + func gchelper() { + _g_ := getg() + _g_.m.traceback = 2 + gchelperstart() + - // parallel mark for over gc roots ++ // parallel mark for over GC roots + parfordo(work.markfor) - - // help other threads scan secondary blocks - scanblock(0, 0, nil) ++ if gcphase != _GCscan { ++ scanblock(0, 0, nil) // blocks in getfull ++ } + + nproc := work.nproc // work.nproc can change right after we increment work.ndone + if xadd(&work.ndone, +1) == nproc-1 { + notewakeup(&work.alldone) + } + _g_.m.traceback = 0 + } + + func cachestats() { + for i := 0; ; i++ { + p := allp[i] + if p == nil { + break + } + c := p.mcache + if c == nil { + continue + } + purgecachedstats(c) + } + } + + func flushallmcaches() { + for i := 0; ; i++ { + p := allp[i] + if p == nil { + break + } + c := p.mcache + if c == nil { + continue + } + mCache_ReleaseAll(c) + stackcache_clear(c) + } + } + + func updatememstats(stats *gcstats) { + if stats != nil { + *stats = gcstats{} + } + for mp := allm; mp != nil; mp = mp.alllink { + if stats != nil { + src := (*[unsafe.Sizeof(gcstats{}) / 8]uint64)(unsafe.Pointer(&mp.gcstats)) + dst := (*[unsafe.Sizeof(gcstats{}) / 8]uint64)(unsafe.Pointer(stats)) + for i, v := range src { + dst[i] += v + } + mp.gcstats = gcstats{} + } + } + + memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse) + memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse) + memstats.sys = memstats.heap_sys + memstats.stacks_sys + memstats.mspan_sys + + memstats.mcache_sys + memstats.buckhash_sys + memstats.gc_sys + memstats.other_sys + + // Calculate memory allocator stats. + // During program execution we only count number of frees and amount of freed memory. + // Current number of alive object in the heap and amount of alive heap memory + // are calculated by scanning all spans. + // Total number of mallocs is calculated as number of frees plus number of alive objects. + // Similarly, total amount of allocated memory is calculated as amount of freed memory + // plus amount of alive heap memory. + memstats.alloc = 0 + memstats.total_alloc = 0 + memstats.nmalloc = 0 + memstats.nfree = 0 + for i := 0; i < len(memstats.by_size); i++ { + memstats.by_size[i].nmalloc = 0 + memstats.by_size[i].nfree = 0 + } + + // Flush MCache's to MCentral. + systemstack(flushallmcaches) + + // Aggregate local stats. + cachestats() + + // Scan all spans and count number of alive objects. + lock(&mheap_.lock) + for i := uint32(0); i < mheap_.nspan; i++ { + s := h_allspans[i] + if s.state != mSpanInUse { + continue + } + if s.sizeclass == 0 { + memstats.nmalloc++ + memstats.alloc += uint64(s.elemsize) + } else { + memstats.nmalloc += uint64(s.ref) + memstats.by_size[s.sizeclass].nmalloc += uint64(s.ref) + memstats.alloc += uint64(s.ref) * uint64(s.elemsize) + } + } + unlock(&mheap_.lock) + + // Aggregate by size class. + smallfree := uint64(0) + memstats.nfree = mheap_.nlargefree + for i := 0; i < len(memstats.by_size); i++ { + memstats.nfree += mheap_.nsmallfree[i] + memstats.by_size[i].nfree = mheap_.nsmallfree[i] + memstats.by_size[i].nmalloc += mheap_.nsmallfree[i] + smallfree += uint64(mheap_.nsmallfree[i]) * uint64(class_to_size[i]) + } + memstats.nfree += memstats.tinyallocs + memstats.nmalloc += memstats.nfree + + // Calculate derived stats. + memstats.total_alloc = uint64(memstats.alloc) + uint64(mheap_.largefree) + smallfree + memstats.heap_alloc = memstats.alloc + memstats.heap_objects = memstats.nmalloc - memstats.nfree + } + + func gcinit() { + if unsafe.Sizeof(workbuf{}) != _WorkbufSize { + gothrow("runtime: size of Workbuf is suboptimal") + } + + work.markfor = parforalloc(_MaxGcproc) + gcpercent = readgogc() + gcdatamask = unrollglobgcprog((*byte)(unsafe.Pointer(&gcdata)), uintptr(unsafe.Pointer(&edata))-uintptr(unsafe.Pointer(&data))) + gcbssmask = unrollglobgcprog((*byte)(unsafe.Pointer(&gcbss)), uintptr(unsafe.Pointer(&ebss))-uintptr(unsafe.Pointer(&bss))) + } + ++// Called from malloc.go using onM, stopping and starting the world handled in caller. + func gc_m(start_time int64, eagersweep bool) { + _g_ := getg() + gp := _g_.m.curg + casgstatus(gp, _Grunning, _Gwaiting) + gp.waitreason = "garbage collection" + + gc(start_time, eagersweep) ++ casgstatus(gp, _Gwaiting, _Grunning) ++} ++ ++// Similar to clearcheckmarkbits but works on a single span. ++// It preforms two tasks. ++// 1. When used before the checkmark phase it converts BitsDead (00) to bitsScalar (01) ++// for nibbles with the BoundaryBit set. ++// 2. When used after the checkmark phase it converts BitsPointerMark (11) to BitsPointer 10 and ++// BitsScalarMark (00) to BitsScalar (01), thus clearing the checkmark mark encoding. ++// For the second case it is possible to restore the BitsDead pattern but since ++// clearmark is a debug tool performance has a lower priority than simplicity. ++// The span is MSpanInUse and the world is stopped. ++func clearcheckmarkbitsspan(s *mspan) { ++ if s.state != _MSpanInUse { ++ print("runtime:clearcheckmarkbitsspan: state=", s.state, "\n") ++ gothrow("clearcheckmarkbitsspan: bad span state") ++ } + - if nbadblock > 0 { - // Work out path from root to bad block. - for { - gc(start_time, eagersweep) - if nbadblock >= int32(len(badblock)) { - gothrow("cannot find path to bad pointer") ++ arena_start := mheap_.arena_start ++ cl := s.sizeclass ++ size := s.elemsize ++ var n int32 ++ if cl == 0 { ++ n = 1 ++ } else { ++ // Chunk full of small blocks ++ npages := class_to_allocnpages[cl] ++ n = npages << _PageShift / int32(size) ++ } ++ ++ // MSpan_Sweep has similar code but instead of overloading and ++ // complicating that routine we do a simpler walk here. ++ // Sweep through n objects of given size starting at p. ++ // This thread owns the span now, so it can manipulate ++ // the block bitmap without atomic operations. ++ p := uintptr(s.start) << _PageShift ++ ++ // Find bits for the beginning of the span. ++ off := (p - arena_start) / ptrSize ++ bitp := (*byte)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1)) ++ step := size / (ptrSize * wordsPerBitmapByte) ++ ++ // The type bit values are: ++ // 00 - BitsDead, for us BitsScalarMarked ++ // 01 - BitsScalar ++ // 10 - BitsPointer ++ // 11 - unused, for us BitsPointerMarked ++ // ++ // When called to prepare for the checkmark phase (checkmark==1), ++ // we change BitsDead to BitsScalar, so that there are no BitsScalarMarked ++ // type bits anywhere. ++ // ++ // The checkmark phase marks by changing BitsScalar to BitsScalarMarked ++ // and BitsPointer to BitsPointerMarked. ++ // ++ // When called to clean up after the checkmark phase (checkmark==0), ++ // we unmark by changing BitsScalarMarked back to BitsScalar and ++ // BitsPointerMarked back to BitsPointer. ++ // ++ // There are two problems with the scheme as just described. ++ // First, the setup rewrites BitsDead to BitsScalar, but the type bits ++ // following a BitsDead are uninitialized and must not be used. ++ // Second, objects that are free are expected to have their type ++ // bits zeroed (BitsDead), so in the cleanup we need to restore ++ // any BitsDeads that were there originally. ++ // ++ // In a one-word object (8-byte allocation on 64-bit system), ++ // there is no difference between BitsScalar and BitsDead, because ++ // neither is a pointer and there are no more words in the object, ++ // so using BitsScalar during the checkmark is safe and mapping ++ // both back to BitsDead during cleanup is also safe. ++ // ++ // In a larger object, we need to be more careful. During setup, ++ // if the type of the first word is BitsDead, we change it to BitsScalar ++ // (as we must) but also initialize the type of the second ++ // word to BitsDead, so that a scan during the checkmark phase ++ // will still stop before seeing the uninitialized type bits in the ++ // rest of the object. The sequence 'BitsScalar BitsDead' never ++ // happens in real type bitmaps - BitsDead is always as early ++ // as possible, so immediately after the last BitsPointer. ++ // During cleanup, if we see a BitsScalar, we can check to see if it ++ // is followed by BitsDead. If so, it was originally BitsDead and ++ // we can change it back. ++ ++ if step == 0 { ++ // updating top and bottom nibbles, all boundaries ++ for i := int32(0); i < n/2; i, bitp = i+1, addb(bitp, uintptrMask&-1) { ++ if *bitp&bitBoundary == 0 { ++ gothrow("missing bitBoundary") ++ } ++ b := (*bitp & bitPtrMask) >> 2 ++ if !checkmark && (b == _BitsScalar || b == _BitsScalarMarked) { ++ *bitp &^= 0x0c // convert to _BitsDead ++ } else if b == _BitsScalarMarked || b == _BitsPointerMarked { ++ *bitp &^= _BitsCheckMarkXor << 2 ++ } ++ ++ if (*bitp>>gcBits)&bitBoundary == 0 { ++ gothrow("missing bitBoundary") ++ } ++ b = ((*bitp >> gcBits) & bitPtrMask) >> 2 ++ if !checkmark && (b == _BitsScalar || b == _BitsScalarMarked) { ++ *bitp &^= 0xc0 // convert to _BitsDead ++ } else if b == _BitsScalarMarked || b == _BitsPointerMarked { ++ *bitp &^= _BitsCheckMarkXor << (2 + gcBits) ++ } ++ } ++ } else { ++ // updating bottom nibble for first word of each object ++ for i := int32(0); i < n; i, bitp = i+1, addb(bitp, -step) { ++ if *bitp&bitBoundary == 0 { ++ gothrow("missing bitBoundary") ++ } ++ b := (*bitp & bitPtrMask) >> 2 ++ ++ if checkmark && b == _BitsDead { ++ // move BitsDead into second word. ++ // set bits to BitsScalar in preparation for checkmark phase. ++ *bitp &^= 0xc0 ++ *bitp |= _BitsScalar << 2 ++ } else if !checkmark && (b == _BitsScalar || b == _BitsScalarMarked) && *bitp&0xc0 == 0 { ++ // Cleaning up after checkmark phase. ++ // First word is scalar or dead (we forgot) ++ // and second word is dead. ++ // First word might as well be dead too. ++ *bitp &^= 0x0c ++ } else if b == _BitsScalarMarked || b == _BitsPointerMarked { ++ *bitp ^= _BitsCheckMarkXor << 2 + } + } + } ++} + - casgstatus(gp, _Gwaiting, _Grunning) ++// clearcheckmarkbits preforms two tasks. ++// 1. When used before the checkmark phase it converts BitsDead (00) to bitsScalar (01) ++// for nibbles with the BoundaryBit set. ++// 2. When used after the checkmark phase it converts BitsPointerMark (11) to BitsPointer 10 and ++// BitsScalarMark (00) to BitsScalar (01), thus clearing the checkmark mark encoding. ++// This is a bit expensive but preserves the BitsDead encoding during the normal marking. ++// BitsDead remains valid for every nibble except the ones with BitsBoundary set. ++func clearcheckmarkbits() { ++ for _, s := range work.spans { ++ if s.state == _MSpanInUse { ++ clearcheckmarkbitsspan(s) ++ } ++ } ++} ++ ++// Called from malloc.go using onM. ++// The world is stopped. Rerun the scan and mark phases ++// using the bitMarkedCheck bit instead of the ++// bitMarked bit. If the marking encounters an ++// bitMarked bit that is not set then we throw. ++func gccheckmark_m(startTime int64, eagersweep bool) { ++ if !gccheckmarkenable { ++ return ++ } ++ ++ if checkmark { ++ gothrow("gccheckmark_m, entered with checkmark already true") ++ } ++ ++ checkmark = true ++ clearcheckmarkbits() // Converts BitsDead to BitsScalar. ++ gc_m(startTime, eagersweep) // turns off checkmark ++ // Work done, fixed up the GC bitmap to remove the checkmark bits. ++ clearcheckmarkbits() ++} ++ ++func gccheckmarkenable_m() { ++ gccheckmarkenable = true ++} ++ ++func gccheckmarkdisable_m() { ++ gccheckmarkenable = false ++} ++ ++func finishsweep_m() { ++ // The world is stopped so we should be able to complete the sweeps ++ // quickly. ++ for sweepone() != ^uintptr(0) { ++ sweep.npausesweep++ ++ } ++ ++ // There may be some other spans being swept concurrently that ++ // we need to wait for. If finishsweep_m is done with the world stopped ++ // this code is not required. ++ sg := mheap_.sweepgen ++ for _, s := range work.spans { ++ if s.sweepgen != sg && s.state == _MSpanInUse { ++ mSpan_EnsureSwept(s) ++ } ++ } ++} ++ ++// Scan all of the stacks, greying (or graying if in America) the referents ++// but not blackening them since the mark write barrier isn't installed. ++func gcscan_m() { ++ _g_ := getg() ++ ++ // Grab the g that called us and potentially allow rescheduling. ++ // This allows it to be scanned like other goroutines. ++ mastergp := _g_.m.curg ++ casgstatus(mastergp, _Grunning, _Gwaiting) ++ mastergp.waitreason = "garbage collection scan" ++ ++ // Span sweeping has been done by finishsweep_m. ++ // Long term we will want to make this goroutine runnable ++ // by placing it onto a scanenqueue state and then calling ++ // runtime·restartg(mastergp) to make it Grunnable. ++ // At the bottom we will want to return this p back to the scheduler. ++ oldphase := gcphase ++ ++ // Prepare flag indicating that the scan has not been completed. ++ lock(&allglock) ++ local_allglen := allglen ++ for i := uintptr(0); i < local_allglen; i++ { ++ gp := allgs[i] ++ gp.gcworkdone = false // set to true in gcphasework ++ } ++ unlock(&allglock) ++ ++ work.nwait = 0 ++ work.ndone = 0 ++ work.nproc = 1 // For now do not do this in parallel. ++ gcphase = _GCscan ++ // ackgcphase is not needed since we are not scanning running goroutines. ++ parforsetup(work.markfor, work.nproc, uint32(_RootCount+local_allglen), nil, false, markroot) ++ parfordo(work.markfor) ++ ++ lock(&allglock) ++ // Check that gc work is done. ++ for i := uintptr(0); i < local_allglen; i++ { ++ gp := allgs[i] ++ if !gp.gcworkdone { ++ gothrow("scan missed a g") ++ } ++ } ++ unlock(&allglock) ++ ++ gcphase = oldphase ++ casgstatus(mastergp, _Gwaiting, _Grunning) ++ // Let the g that called us continue to run. ++} ++ ++// Mark all objects that are known about. ++func gcmark_m() { ++ scanblock(0, 0, nil) ++} ++ ++// For now this must be bracketed with a stoptheworld and a starttheworld to ensure ++// all go routines see the new barrier. ++func gcinstallmarkwb_m() { ++ gcphase = _GCmark ++} ++ ++// For now this must be bracketed with a stoptheworld and a starttheworld to ensure ++// all go routines see the new barrier. ++func gcinstalloffwb_m() { ++ gcphase = _GCoff + } + + func gc(start_time int64, eagersweep bool) { + if _DebugGCPtrs { + print("GC start\n") + } + + if debug.allocfreetrace > 0 { + tracegc() + } + + _g_ := getg() + _g_.m.traceback = 2 + t0 := start_time + work.tstart = start_time + + var t1 int64 + if debug.gctrace > 0 { + t1 = nanotime() + } + - // Sweep what is not sweeped by bgsweep. - for sweepone() != ^uintptr(0) { - sweep.npausesweep++ ++ if !checkmark { ++ finishsweep_m() // skip during checkmark debug phase. + } + + // Cache runtime.mheap_.allspans in work.spans to avoid conflicts with + // resizing/freeing allspans. + // New spans can be created while GC progresses, but they are not garbage for + // this round: + // - new stack spans can be created even while the world is stopped. + // - new malloc spans can be created during the concurrent sweep + + // Even if this is stop-the-world, a concurrent exitsyscall can allocate a stack from heap. + lock(&mheap_.lock) + // Free the old cached sweep array if necessary. + if work.spans != nil && &work.spans[0] != &h_allspans[0] { + sysFree(unsafe.Pointer(&work.spans[0]), uintptr(len(work.spans))*unsafe.Sizeof(work.spans[0]), &memstats.other_sys) + } + // Cache the current array for marking. + mheap_.gcspans = mheap_.allspans + work.spans = h_allspans + unlock(&mheap_.lock) ++ oldphase := gcphase + + work.nwait = 0 + work.ndone = 0 + work.nproc = uint32(gcprocs()) ++ gcphase = _GCmarktermination ++ ++ // World is stopped so allglen will not change. ++ for i := uintptr(0); i < allglen; i++ { ++ gp := allgs[i] ++ gp.gcworkdone = false // set to true in gcphasework ++ } ++ + parforsetup(work.markfor, work.nproc, uint32(_RootCount+allglen), nil, false, markroot) + if work.nproc > 1 { + noteclear(&work.alldone) + helpgc(int32(work.nproc)) + } + + var t2 int64 + if debug.gctrace > 0 { + t2 = nanotime() + } + + gchelperstart() + parfordo(work.markfor) + scanblock(0, 0, nil) + ++ if work.full != 0 { ++ gothrow("work.full != 0") ++ } ++ if work.partial != 0 { ++ gothrow("work.partial != 0") ++ } ++ ++ gcphase = oldphase + var t3 int64 + if debug.gctrace > 0 { + t3 = nanotime() + } + + if work.nproc > 1 { + notesleep(&work.alldone) + } + + shrinkfinish() + + cachestats() + // next_gc calculation is tricky with concurrent sweep since we don't know size of live heap + // estimate what was live heap size after previous GC (for printing only) + heap0 := memstats.next_gc * 100 / (uint64(gcpercent) + 100) + // conservatively set next_gc to high value assuming that everything is live + // concurrent/lazy sweep will reduce this number while discovering new garbage + memstats.next_gc = memstats.heap_alloc + memstats.heap_alloc*uint64(gcpercent)/100 + + t4 := nanotime() + atomicstore64(&memstats.last_gc, uint64(unixnanotime())) // must be Unix time to make sense to user + memstats.pause_ns[memstats.numgc%uint32(len(memstats.pause_ns))] = uint64(t4 - t0) + memstats.pause_end[memstats.numgc%uint32(len(memstats.pause_end))] = uint64(t4) + memstats.pause_total_ns += uint64(t4 - t0) + memstats.numgc++ + if memstats.debuggc { + print("pause ", t4-t0, "\n") + } + + if debug.gctrace > 0 { + heap1 := memstats.heap_alloc + var stats gcstats + updatememstats(&stats) + if heap1 != memstats.heap_alloc { + print("runtime: mstats skew: heap=", heap1, "/", memstats.heap_alloc, "\n") + gothrow("mstats skew") + } + obj := memstats.nmalloc - memstats.nfree + + stats.nprocyield += work.markfor.nprocyield + stats.nosyield += work.markfor.nosyield + stats.nsleep += work.markfor.nsleep + + print("gc", memstats.numgc, "(", work.nproc, "): ", + (t1-t0)/1000, "+", (t2-t1)/1000, "+", (t3-t2)/1000, "+", (t4-t3)/1000, " us, ", + heap0>>20, " -> ", heap1>>20, " MB, ", + obj, " (", memstats.nmalloc, "-", memstats.nfree, ") objects, ", + gcount(), " goroutines, ", + len(work.spans), "/", sweep.nbgsweep, "/", sweep.npausesweep, " sweeps, ", + stats.nhandoff, "(", stats.nhandoffcnt, ") handoff, ", + work.markfor.nsteal, "(", work.markfor.nstealcnt, ") steal, ", + stats.nprocyield, "/", stats.nosyield, "/", stats.nsleep, " yields\n") + sweep.nbgsweep = 0 + sweep.npausesweep = 0 + } + + // See the comment in the beginning of this function as to why we need the following. + // Even if this is still stop-the-world, a concurrent exitsyscall can allocate a stack from heap. + lock(&mheap_.lock) + // Free the old cached mark array if necessary. + if work.spans != nil && &work.spans[0] != &h_allspans[0] { + sysFree(unsafe.Pointer(&work.spans[0]), uintptr(len(work.spans))*unsafe.Sizeof(work.spans[0]), &memstats.other_sys) + } + ++ if gccheckmarkenable { ++ if !checkmark { ++ // first half of two-pass; don't set up sweep ++ unlock(&mheap_.lock) ++ return ++ } ++ checkmark = false // done checking marks ++ } ++ + // Cache the current array for sweeping. + mheap_.gcspans = mheap_.allspans + mheap_.sweepgen += 2 + mheap_.sweepdone = 0 + work.spans = h_allspans + sweep.spanidx = 0 + unlock(&mheap_.lock) + + if _ConcurrentSweep && !eagersweep { + lock(&gclock) + if !sweep.started { + go bgsweep() + sweep.started = true + } else if sweep.parked { + sweep.parked = false + ready(sweep.g) + } + unlock(&gclock) + } else { + // Sweep all spans eagerly. + for sweepone() != ^uintptr(0) { + sweep.npausesweep++ + } + // Do an additional mProf_GC, because all 'free' events are now real as well. + mProf_GC() + } + + mProf_GC() + _g_.m.traceback = 0 + + if _DebugGCPtrs { + print("GC end\n") + } + } + + func readmemstats_m(stats *MemStats) { + updatememstats(nil) + + // Size of the trailing by_size array differs between Go and C, + // NumSizeClasses was changed, but we can not change Go struct because of backward compatibility. + memmove(unsafe.Pointer(stats), unsafe.Pointer(&memstats), sizeof_C_MStats) + + // Stack numbers are part of the heap numbers, separate those out for user consumption + stats.StackSys = stats.StackInuse + stats.HeapInuse -= stats.StackInuse + stats.HeapSys -= stats.StackInuse + } + + //go:linkname readGCStats runtime/debug.readGCStats + func readGCStats(pauses *[]uint64) { + systemstack(func() { + readGCStats_m(pauses) + }) + } + + func readGCStats_m(pauses *[]uint64) { + p := *pauses + // Calling code in runtime/debug should make the slice large enough. + if cap(p) < len(memstats.pause_ns)+3 { + gothrow("runtime: short slice passed to readGCStats") + } + + // Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns. + lock(&mheap_.lock) + + n := memstats.numgc + if n > uint32(len(memstats.pause_ns)) { + n = uint32(len(memstats.pause_ns)) + } + + // The pause buffer is circular. The most recent pause is at + // pause_ns[(numgc-1)%len(pause_ns)], and then backward + // from there to go back farther in time. We deliver the times + // most recent first (in p[0]). + p = p[:cap(p)] + for i := uint32(0); i < n; i++ { + j := (memstats.numgc - 1 - i) % uint32(len(memstats.pause_ns)) + p[i] = memstats.pause_ns[j] + p[n+i] = memstats.pause_end[j] + } + + p[n+n] = memstats.last_gc + p[n+n+1] = uint64(memstats.numgc) + p[n+n+2] = memstats.pause_total_ns + unlock(&mheap_.lock) + *pauses = p[:n+n+3] + } + + func setGCPercent(in int32) (out int32) { + lock(&mheap_.lock) + out = gcpercent + if in < 0 { + in = -1 + } + gcpercent = in + unlock(&mheap_.lock) + return out + } + + func gchelperstart() { + _g_ := getg() + + if _g_.m.helpgc < 0 || _g_.m.helpgc >= _MaxGcproc { + gothrow("gchelperstart: bad m->helpgc") + } + if _g_ != _g_.m.g0 { + gothrow("gchelper not running on g0 stack") + } + } + + func wakefing() *g { + var res *g + lock(&finlock) + if fingwait && fingwake { + fingwait = false + fingwake = false + res = fing + } + unlock(&finlock) + return res + } + + func addb(p *byte, n uintptr) *byte { + return (*byte)(add(unsafe.Pointer(p), n)) + } + + // Recursively unrolls GC program in prog. + // mask is where to store the result. + // ppos is a pointer to position in mask, in bits. + // sparse says to generate 4-bits per word mask for heap (2-bits for data/bss otherwise). + func unrollgcprog1(maskp *byte, prog *byte, ppos *uintptr, inplace, sparse bool) *byte { + arena_start := mheap_.arena_start + pos := *ppos + mask := (*[1 << 30]byte)(unsafe.Pointer(maskp)) + for { + switch *prog { + default: + gothrow("unrollgcprog: unknown instruction") + + case insData: + prog = addb(prog, 1) + siz := int(*prog) + prog = addb(prog, 1) + p := (*[1 << 30]byte)(unsafe.Pointer(prog)) + for i := 0; i < siz; i++ { + v := p[i/_PointersPerByte] + v >>= (uint(i) % _PointersPerByte) * _BitsPerPointer + v &= _BitsMask + if inplace { + // Store directly into GC bitmap. + off := (uintptr(unsafe.Pointer(&mask[pos])) - arena_start) / ptrSize + bitp := (*byte)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1)) + shift := (off % wordsPerBitmapByte) * gcBits + if shift == 0 { + *bitp = 0 + } + *bitp |= v << (shift + 2) + pos += ptrSize + } else if sparse { + // 4-bits per word + v <<= (pos % 8) + 2 + mask[pos/8] |= v + pos += gcBits + } else { + // 2-bits per word + v <<= pos % 8 + mask[pos/8] |= v + pos += _BitsPerPointer + } + } + prog = addb(prog, round(uintptr(siz)*_BitsPerPointer, 8)/8) + + case insArray: + prog = (*byte)(add(unsafe.Pointer(prog), 1)) + siz := uintptr(0) + for i := uintptr(0); i < ptrSize; i++ { + siz = (siz << 8) + uintptr(*(*byte)(add(unsafe.Pointer(prog), ptrSize-i-1))) + } + prog = (*byte)(add(unsafe.Pointer(prog), ptrSize)) + var prog1 *byte + for i := uintptr(0); i < siz; i++ { + prog1 = unrollgcprog1(&mask[0], prog, &pos, inplace, sparse) + } + if *prog1 != insArrayEnd { + gothrow("unrollgcprog: array does not end with insArrayEnd") + } + prog = (*byte)(add(unsafe.Pointer(prog1), 1)) + + case insArrayEnd, insEnd: + *ppos = pos + return prog + } + } + } + + // Unrolls GC program prog for data/bss, returns dense GC mask. + func unrollglobgcprog(prog *byte, size uintptr) bitvector { + masksize := round(round(size, ptrSize)/ptrSize*bitsPerPointer, 8) / 8 + mask := (*[1 << 30]byte)(persistentalloc(masksize+1, 0, &memstats.gc_sys)) + mask[masksize] = 0xa1 + pos := uintptr(0) + prog = unrollgcprog1(&mask[0], prog, &pos, false, false) + if pos != size/ptrSize*bitsPerPointer { + print("unrollglobgcprog: bad program size, got ", pos, ", expect ", size/ptrSize*bitsPerPointer, "\n") + gothrow("unrollglobgcprog: bad program size") + } + if *prog != insEnd { + gothrow("unrollglobgcprog: program does not end with insEnd") + } + if mask[masksize] != 0xa1 { + gothrow("unrollglobgcprog: overflow") + } + return bitvector{int32(masksize * 8), &mask[0]} + } + + func unrollgcproginplace_m(v unsafe.Pointer, typ *_type, size, size0 uintptr) { + pos := uintptr(0) + prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1]))) + for pos != size0 { + unrollgcprog1((*byte)(v), prog, &pos, true, true) + } + + // Mark first word as bitAllocated. + arena_start := mheap_.arena_start + off := (uintptr(v) - arena_start) / ptrSize + bitp := (*byte)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1)) + shift := (off % wordsPerBitmapByte) * gcBits + *bitp |= bitBoundary << shift + + // Mark word after last as BitsDead. + if size0 < size { + off := (uintptr(v) + size0 - arena_start) / ptrSize + bitp := (*byte)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1)) + shift := (off % wordsPerBitmapByte) * gcBits + *bitp &= uint8(^(bitPtrMask << shift) | uintptr(bitsDead)<<(shift+2)) + } + } + + var unroll mutex + + // Unrolls GC program in typ.gc[1] into typ.gc[0] + func unrollgcprog_m(typ *_type) { + lock(&unroll) + mask := (*byte)(unsafe.Pointer(uintptr(typ.gc[0]))) + if *mask == 0 { + pos := uintptr(8) // skip the unroll flag + prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1]))) + prog = unrollgcprog1(mask, prog, &pos, false, true) + if *prog != insEnd { + gothrow("unrollgcprog: program does not end with insEnd") + } + if typ.size/ptrSize%2 != 0 { + // repeat the program + prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1]))) + unrollgcprog1(mask, prog, &pos, false, true) + } + + // atomic way to say mask[0] = 1 + atomicor8(mask, 1) + } + unlock(&unroll) + } + + // mark the span of memory at v as having n blocks of the given size. + // if leftover is true, there is left over space at the end of the span. + func markspan(v unsafe.Pointer, size uintptr, n uintptr, leftover bool) { + if uintptr(v)+size*n > mheap_.arena_used || uintptr(v) < mheap_.arena_start { + gothrow("markspan: bad pointer") + } + + // Find bits of the beginning of the span. + off := (uintptr(v) - uintptr(mheap_.arena_start)) / ptrSize + if off%wordsPerBitmapByte != 0 { + gothrow("markspan: unaligned length") + } + b := mheap_.arena_start - off/wordsPerBitmapByte - 1 + + // Okay to use non-atomic ops here, because we control + // the entire span, and each bitmap byte has bits for only + // one span, so no other goroutines are changing these bitmap words. + + if size == ptrSize { + // Possible only on 64-bits (minimal size class is 8 bytes). + // Set memory to 0x11. + if (bitBoundary|bitsDead)< mheap_.arena_used || v < mheap_.arena_start { + gothrow("markspan: bad pointer") + } + + off := (v - mheap_.arena_start) / ptrSize // word offset + if off%(ptrSize*wordsPerBitmapByte) != 0 { + gothrow("markspan: unaligned pointer") + } + + b := mheap_.arena_start - off/wordsPerBitmapByte - 1 + n /= ptrSize + if n%(ptrSize*wordsPerBitmapByte) != 0 { + gothrow("unmarkspan: unaligned length") + } + + // Okay to use non-atomic ops here, because we control + // the entire span, and each bitmap word has bits for only + // one span, so no other goroutines are changing these + // bitmap words. + n /= wordsPerBitmapByte + memclr(unsafe.Pointer(b-n+1), n) + } + + func mHeap_MapBits(h *mheap) { + // Caller has added extra mappings to the arena. + // Add extra mappings of bitmap words as needed. + // We allocate extra bitmap pieces in chunks of bitmapChunk. + const bitmapChunk = 8192 + + n := (h.arena_used - h.arena_start) / (ptrSize * wordsPerBitmapByte) + n = round(n, bitmapChunk) + n = round(n, _PhysPageSize) + if h.bitmap_mapped >= n { + return + } + + sysMap(unsafe.Pointer(h.arena_start-n), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys) + h.bitmap_mapped = n + } + + func getgcmaskcb(frame *stkframe, ctxt unsafe.Pointer) bool { + target := (*stkframe)(ctxt) + if frame.sp <= target.sp && target.sp < frame.varp { + *target = *frame + return false + } + return true + } + + // Returns GC type info for object p for testing. + func getgcmask(p unsafe.Pointer, t *_type, mask **byte, len *uintptr) { + *mask = nil + *len = 0 + + // data + if uintptr(unsafe.Pointer(&data)) <= uintptr(p) && uintptr(p) < uintptr(unsafe.Pointer(&edata)) { + n := (*ptrtype)(unsafe.Pointer(t)).elem.size + *len = n / ptrSize + *mask = &make([]byte, *len)[0] + for i := uintptr(0); i < n; i += ptrSize { + off := (uintptr(p) + i - uintptr(unsafe.Pointer(&data))) / ptrSize + bits := (*(*byte)(add(unsafe.Pointer(gcdatamask.bytedata), off/pointersPerByte)) >> ((off % pointersPerByte) * bitsPerPointer)) & bitsMask + *(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits + } + return + } + + // bss + if uintptr(unsafe.Pointer(&bss)) <= uintptr(p) && uintptr(p) < uintptr(unsafe.Pointer(&ebss)) { + n := (*ptrtype)(unsafe.Pointer(t)).elem.size + *len = n / ptrSize + *mask = &make([]byte, *len)[0] + for i := uintptr(0); i < n; i += ptrSize { + off := (uintptr(p) + i - uintptr(unsafe.Pointer(&bss))) / ptrSize + bits := (*(*byte)(add(unsafe.Pointer(gcbssmask.bytedata), off/pointersPerByte)) >> ((off % pointersPerByte) * bitsPerPointer)) & bitsMask + *(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits + } + return + } + + // heap + var n uintptr + var base uintptr + if mlookup(uintptr(p), &base, &n, nil) != 0 { + *len = n / ptrSize + *mask = &make([]byte, *len)[0] + for i := uintptr(0); i < n; i += ptrSize { + off := (uintptr(base) + i - mheap_.arena_start) / ptrSize + b := mheap_.arena_start - off/wordsPerBitmapByte - 1 + shift := (off % wordsPerBitmapByte) * gcBits + bits := (*(*byte)(unsafe.Pointer(b)) >> (shift + 2)) & bitsMask + *(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits + } + return + } + + // stack + var frame stkframe + frame.sp = uintptr(p) + _g_ := getg() + gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0) + if frame.fn != nil { + f := frame.fn + targetpc := frame.continpc + if targetpc == 0 { + return + } + if targetpc != f.entry { + targetpc-- + } + pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc) + if pcdata == -1 { + return + } + stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps)) + if stkmap == nil || stkmap.n <= 0 { + return + } + bv := stackmapdata(stkmap, pcdata) + size := uintptr(bv.n) / bitsPerPointer * ptrSize + n := (*ptrtype)(unsafe.Pointer(t)).elem.size + *len = n / ptrSize + *mask = &make([]byte, *len)[0] + for i := uintptr(0); i < n; i += ptrSize { + off := (uintptr(p) + i - frame.varp + size) / ptrSize + bits := ((*(*byte)(add(unsafe.Pointer(bv.bytedata), off*bitsPerPointer/8))) >> ((off * bitsPerPointer) % 8)) & bitsMask + *(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits + } + } + } + + func unixnanotime() int64 { + var now int64 + gc_unixnanotime(&now) + return now + } diff --cc src/runtime/mgc0.go index dc4eec5196,6d4ae61c11..00e64c0fff --- a/src/runtime/mgc0.go +++ b/src/runtime/mgc0.go @@@ -93,36 -86,6 +91,32 @@@ const //go:nosplit func writebarrierptr(dst *uintptr, src uintptr) { *dst = src + writebarrierptr_nostore(dst, src) +} + +// Like writebarrierptr, but the store has already been applied. +// Do not reapply. +//go:nosplit +func writebarrierptr_nostore(dst *uintptr, src uintptr) { + if getg() == nil { // very low-level startup + return + } + + if src != 0 && (src < _PageSize || src == _PoisonGC || src == _PoisonStack) { - onM(func() { gothrow("bad pointer in write barrier") }) ++ systemstack(func() { gothrow("bad pointer in write barrier") }) + } + + mp := acquirem() + if mp.inwb || mp.dying > 0 { + releasem(mp) + return + } + mp.inwb = true - oldscalar0 := mp.scalararg[0] - oldscalar1 := mp.scalararg[1] - mp.scalararg[0] = uintptr(unsafe.Pointer(dst)) - mp.scalararg[1] = src - onM_signalok(gcmarkwb_m) - mp.scalararg[0] = oldscalar0 - mp.scalararg[1] = oldscalar1 ++ systemstack(func() { ++ gcmarkwb_m(dst, src) ++ }) + mp.inwb = false + releasem(mp) } //go:nosplit diff --cc src/runtime/mgc0.h index 519d7206e7,62726b4f0f..dd0c460246 --- a/src/runtime/mgc0.h +++ b/src/runtime/mgc0.h @@@ -2,81 -2,19 +2,21 @@@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. - // Garbage collector (GC) + // Used by cmd/gc. enum { - // Four bits per word (see #defines below). gcBits = 4, - wordsPerBitmapByte = 8/gcBits, - - // GC type info programs. - // The programs allow to store type info required for GC in a compact form. - // Most importantly arrays take O(1) space instead of O(n). - // The program grammar is: - // - // Program = {Block} "insEnd" - // Block = Data | Array - // Data = "insData" DataSize DataBlock - // DataSize = int // size of the DataBlock in bit pairs, 1 byte - // DataBlock = binary // dense GC mask (2 bits per word) of size ]DataSize/4[ bytes - // Array = "insArray" ArrayLen Block "insArrayEnd" - // ArrayLen = int // length of the array, 8 bytes (4 bytes for 32-bit arch) - // - // Each instruction (insData, insArray, etc) is 1 byte. - // For example, for type struct { x []byte; y [20]struct{ z int; w *byte }; } - // the program looks as: - // - // insData 3 (BitsMultiWord BitsSlice BitsScalar) - // insArray 20 insData 2 (BitsScalar BitsPointer) insArrayEnd insEnd - // - // Total size of the program is 17 bytes (13 bytes on 32-bits). - // The corresponding GC mask would take 43 bytes (it would be repeated - // because the type has odd number of words). + BitsPerPointer = 2, + BitsDead = 0, + BitsScalar = 1, + BitsPointer = 2, + BitsMask = 3, + PointersPerByte = 8/BitsPerPointer, - MaxGCMask = 64, insData = 1, insArray, insArrayEnd, insEnd, + - // Pointer map - BitsPerPointer = 2, - BitsMask = (1< + const ( + _CTL_HW = 6 + _HW_NCPU = 3 + ) + + var sigset_none = sigset{} + var sigset_all = sigset{[4]uint32{^uint32(0), ^uint32(0), ^uint32(0), ^uint32(0)}} + + func getncpu() int32 { + mib := [2]uint32{_CTL_HW, _HW_NCPU} + out := uint32(0) + nout := unsafe.Sizeof(out) + ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0) + if ret >= 0 { + return int32(out) + } + return 1 + } + + // FreeBSD's umtx_op syscall is effectively the same as Linux's futex, and + // thus the code is largely similar. See Linux implementation + // and lock_futex.c for comments. + + //go:nosplit + func futexsleep(addr *uint32, val uint32, ns int64) { + systemstack(func() { + futexsleep1(addr, val, ns) + }) + } + + func futexsleep1(addr *uint32, val uint32, ns int64) { + var tsp *timespec + if ns >= 0 { + var ts timespec + ts.tv_nsec = 0 - ts.set_sec(timediv(ns, 1000000000, (*int32)(unsafe.Pointer(&ts.tv_nsec)))) ++ ts.set_sec(int64(timediv(ns, 1000000000, (*int32)(unsafe.Pointer(&ts.tv_nsec))))) + tsp = &ts + } + ret := sys_umtx_op(addr, _UMTX_OP_WAIT_UINT_PRIVATE, val, nil, tsp) + if ret >= 0 || ret == -_EINTR { + return + } + print("umtx_wait addr=", addr, " val=", val, " ret=", ret, "\n") + *(*int32)(unsafe.Pointer(uintptr(0x1005))) = 0x1005 + } + + //go:nosplit + func futexwakeup(addr *uint32, cnt uint32) { + ret := sys_umtx_op(addr, _UMTX_OP_WAKE_PRIVATE, cnt, nil, nil) + if ret >= 0 { + return + } + + systemstack(func() { + print("umtx_wake_addr=", addr, " ret=", ret, "\n") + }) + } + + func thr_start() + + func newosproc(mp *m, stk unsafe.Pointer) { + if false { + print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " thr_start=", funcPC(thr_start), " id=", mp.id, "/", mp.tls[0], " ostk=", &mp, "\n") + } + + // NOTE(rsc): This code is confused. stackbase is the top of the stack + // and is equal to stk. However, it's working, so I'm not changing it. + param := thrparam{ + start_func: funcPC(thr_start), + arg: unsafe.Pointer(mp), + stack_base: mp.g0.stack.hi, + stack_size: uintptr(stk) - mp.g0.stack.hi, + child_tid: unsafe.Pointer(&mp.procid), + parent_tid: nil, + tls_base: unsafe.Pointer(&mp.tls[0]), + tls_size: unsafe.Sizeof(mp.tls), + } + mp.tls[0] = uintptr(mp.id) // so 386 asm can find it + + var oset sigset + sigprocmask(&sigset_all, &oset) + thr_new(¶m, int32(unsafe.Sizeof(param))) + sigprocmask(&oset, nil) + } + + func osinit() { + ncpu = getncpu() + } + + var urandom_data [_HashRandomBytes]byte + var urandom_dev = []byte("/dev/random\x00") + + //go:nosplit + func get_random_data(rnd *unsafe.Pointer, rnd_len *int32) { + fd := open(&urandom_dev[0], 0 /* O_RDONLY */, 0) + if read(fd, unsafe.Pointer(&urandom_data), _HashRandomBytes) == _HashRandomBytes { + *rnd = unsafe.Pointer(&urandom_data[0]) + *rnd_len = _HashRandomBytes + } else { + *rnd = nil + *rnd_len = 0 + } + close(fd) + } + + func goenvs() { + goenvs_unix() + } + + // Called to initialize a new m (including the bootstrap m). + // Called on the parent thread (main thread in case of bootstrap), can allocate memory. + func mpreinit(mp *m) { + mp.gsignal = malg(32 * 1024) + mp.gsignal.m = mp + } + + // Called to initialize a new m (including the bootstrap m). + // Called on the new thread, can not allocate memory. + func minit() { + _g_ := getg() + + // m.procid is a uint64, but thr_new writes a uint32 on 32-bit systems. + // Fix it up. (Only matters on big-endian, but be clean anyway.) + if ptrSize == 4 { + _g_.m.procid = uint64(*(*uint32)(unsafe.Pointer(&_g_.m.procid))) + } + + // Initialize signal handling. + signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024) + sigprocmask(&sigset_none, nil) + } + + // Called from dropm to undo the effect of an minit. + func unminit() { + signalstack(nil, 0) + } + + func memlimit() uintptr { + /* + TODO: Convert to Go when something actually uses the result. + Rlimit rl; + extern byte runtime·text[], runtime·end[]; + uintptr used; + + if(runtime·getrlimit(RLIMIT_AS, &rl) != 0) + return 0; + if(rl.rlim_cur >= 0x7fffffff) + return 0; + + // Estimate our VM footprint excluding the heap. + // Not an exact science: use size of binary plus + // some room for thread stacks. + used = runtime·end - runtime·text + (64<<20); + if(used >= rl.rlim_cur) + return 0; + + // If there's not at least 16 MB left, we're probably + // not going to be able to do much. Treat as no limit. + rl.rlim_cur -= used; + if(rl.rlim_cur < (16<<20)) + return 0; + + return rl.rlim_cur - used; + */ + + return 0 + } + + func sigtramp() + + type sigactiont struct { + sa_handler uintptr + sa_flags int32 + sa_mask sigset + } + + func setsig(i int32, fn uintptr, restart bool) { + var sa sigactiont + sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK + if restart { + sa.sa_flags |= _SA_RESTART + } + sa.sa_mask = sigset_all + if fn == funcPC(sighandler) { + fn = funcPC(sigtramp) + } + sa.sa_handler = fn + sigaction(i, &sa, nil) + } + func getsig(i int32) uintptr { + var sa sigactiont + sigaction(i, nil, &sa) + if sa.sa_handler == funcPC(sigtramp) { + return funcPC(sighandler) + } + return sa.sa_handler + } + + func signalstack(p *byte, n int32) { + var st stackt + st.ss_sp = uintptr(unsafe.Pointer(p)) + st.ss_size = uintptr(n) + st.ss_flags = 0 + if p == nil { + st.ss_flags = _SS_DISABLE + } + sigaltstack(&st, nil) + } + + func unblocksignals() { + sigprocmask(&sigset_none, nil) + } diff --cc src/runtime/os1_linux.go index 0000000000,0d24c5edc9..67fa6391e1 mode 000000,100644..100644 --- a/src/runtime/os1_linux.go +++ b/src/runtime/os1_linux.go @@@ -1,0 -1,287 +1,287 @@@ + // Copyright 2009 The Go Authors. All rights reserved. + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + + package runtime + + import "unsafe" + + var sigset_none sigset + var sigset_all sigset = sigset{^uint32(0), ^uint32(0)} + + // Linux futex. + // + // futexsleep(uint32 *addr, uint32 val) + // futexwakeup(uint32 *addr) + // + // Futexsleep atomically checks if *addr == val and if so, sleeps on addr. + // Futexwakeup wakes up threads sleeping on addr. + // Futexsleep is allowed to wake up spuriously. + + const ( + _FUTEX_WAIT = 0 + _FUTEX_WAKE = 1 + ) + + // Atomically, + // if(*addr == val) sleep + // Might be woken up spuriously; that's allowed. + // Don't sleep longer than ns; ns < 0 means forever. + //go:nosplit + func futexsleep(addr *uint32, val uint32, ns int64) { + var ts timespec + + // Some Linux kernels have a bug where futex of + // FUTEX_WAIT returns an internal error code + // as an errno. Libpthread ignores the return value + // here, and so can we: as it says a few lines up, + // spurious wakeups are allowed. + if ns < 0 { + futex(unsafe.Pointer(addr), _FUTEX_WAIT, val, nil, nil, 0) + return + } + + // It's difficult to live within the no-split stack limits here. + // On ARM and 386, a 64-bit divide invokes a general software routine + // that needs more stack than we can afford. So we use timediv instead. + // But on real 64-bit systems, where words are larger but the stack limit + // is not, even timediv is too heavy, and we really need to use just an + // ordinary machine instruction. + if ptrSize == 8 { - ts.set_sec(int32(ns / 1000000000)) ++ ts.set_sec(ns / 1000000000) + ts.set_nsec(int32(ns % 1000000000)) + } else { + ts.tv_nsec = 0 - ts.set_sec(timediv(ns, 1000000000, (*int32)(unsafe.Pointer(&ts.tv_nsec)))) ++ ts.set_sec(int64(timediv(ns, 1000000000, (*int32)(unsafe.Pointer(&ts.tv_nsec))))) + } + futex(unsafe.Pointer(addr), _FUTEX_WAIT, val, unsafe.Pointer(&ts), nil, 0) + } + + // If any procs are sleeping on addr, wake up at most cnt. + //go:nosplit + func futexwakeup(addr *uint32, cnt uint32) { + ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE, cnt, nil, nil, 0) + if ret >= 0 { + return + } + + // I don't know that futex wakeup can return + // EAGAIN or EINTR, but if it does, it would be + // safe to loop and call futex again. + systemstack(func() { + print("futexwakeup addr=", addr, " returned ", ret, "\n") + }) + + *(*int32)(unsafe.Pointer(uintptr(0x1006))) = 0x1006 + } + + func getproccount() int32 { + var buf [16]uintptr + r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0]) + n := int32(0) + for _, v := range buf[:r/ptrSize] { + for i := 0; i < 64; i++ { + n += int32(v & 1) + v >>= 1 + } + } + if n == 0 { + n = 1 + } + return n + } + + // Clone, the Linux rfork. + const ( + _CLONE_VM = 0x100 + _CLONE_FS = 0x200 + _CLONE_FILES = 0x400 + _CLONE_SIGHAND = 0x800 + _CLONE_PTRACE = 0x2000 + _CLONE_VFORK = 0x4000 + _CLONE_PARENT = 0x8000 + _CLONE_THREAD = 0x10000 + _CLONE_NEWNS = 0x20000 + _CLONE_SYSVSEM = 0x40000 + _CLONE_SETTLS = 0x80000 + _CLONE_PARENT_SETTID = 0x100000 + _CLONE_CHILD_CLEARTID = 0x200000 + _CLONE_UNTRACED = 0x800000 + _CLONE_CHILD_SETTID = 0x1000000 + _CLONE_STOPPED = 0x2000000 + _CLONE_NEWUTS = 0x4000000 + _CLONE_NEWIPC = 0x8000000 + ) + + func newosproc(mp *m, stk unsafe.Pointer) { + /* + * note: strace gets confused if we use CLONE_PTRACE here. + */ + var flags int32 = _CLONE_VM | /* share memory */ + _CLONE_FS | /* share cwd, etc */ + _CLONE_FILES | /* share fd table */ + _CLONE_SIGHAND | /* share sig handler table */ + _CLONE_THREAD /* revisit - okay for now */ + + mp.tls[0] = uintptr(mp.id) // so 386 asm can find it + if false { + print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " clone=", funcPC(clone), " id=", mp.id, "/", mp.tls[0], " ostk=", &mp, "\n") + } + + // Disable signals during clone, so that the new thread starts + // with signals disabled. It will enable them in minit. + var oset sigset + rtsigprocmask(_SIG_SETMASK, &sigset_all, &oset, int32(unsafe.Sizeof(oset))) + ret := clone(flags, stk, unsafe.Pointer(mp), unsafe.Pointer(mp.g0), unsafe.Pointer(funcPC(mstart))) + rtsigprocmask(_SIG_SETMASK, &oset, nil, int32(unsafe.Sizeof(oset))) + + if ret < 0 { + print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", -ret, ")\n") + gothrow("newosproc") + } + } + + func osinit() { + ncpu = getproccount() + } + + // Random bytes initialized at startup. These come + // from the ELF AT_RANDOM auxiliary vector (vdso_linux_amd64.c). + // byte* runtime·startup_random_data; + // uint32 runtime·startup_random_data_len; + + var urandom_data [_HashRandomBytes]byte + var urandom_dev = []byte("/dev/random\x00") + + //go:nosplit + func get_random_data(rnd *unsafe.Pointer, rnd_len *int32) { + if startup_random_data != nil { + *rnd = unsafe.Pointer(startup_random_data) + *rnd_len = int32(startup_random_data_len) + return + } + fd := open(&urandom_dev[0], 0 /* O_RDONLY */, 0) + if read(fd, unsafe.Pointer(&urandom_data), _HashRandomBytes) == _HashRandomBytes { + *rnd = unsafe.Pointer(&urandom_data[0]) + *rnd_len = _HashRandomBytes + } else { + *rnd = nil + *rnd_len = 0 + } + close(fd) + } + + func goenvs() { + goenvs_unix() + } + + // Called to initialize a new m (including the bootstrap m). + // Called on the parent thread (main thread in case of bootstrap), can allocate memory. + func mpreinit(mp *m) { + mp.gsignal = malg(32 * 1024) // Linux wants >= 2K + mp.gsignal.m = mp + } + + // Called to initialize a new m (including the bootstrap m). + // Called on the new thread, can not allocate memory. + func minit() { + // Initialize signal handling. + _g_ := getg() + signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024) + rtsigprocmask(_SIG_SETMASK, &sigset_none, nil, int32(unsafe.Sizeof(sigset_none))) + } + + // Called from dropm to undo the effect of an minit. + func unminit() { + signalstack(nil, 0) + } + + func memlimit() uintptr { + /* + TODO: Convert to Go when something actually uses the result. + + Rlimit rl; + extern byte runtime·text[], runtime·end[]; + uintptr used; + + if(runtime·getrlimit(RLIMIT_AS, &rl) != 0) + return 0; + if(rl.rlim_cur >= 0x7fffffff) + return 0; + + // Estimate our VM footprint excluding the heap. + // Not an exact science: use size of binary plus + // some room for thread stacks. + used = runtime·end - runtime·text + (64<<20); + if(used >= rl.rlim_cur) + return 0; + + // If there's not at least 16 MB left, we're probably + // not going to be able to do much. Treat as no limit. + rl.rlim_cur -= used; + if(rl.rlim_cur < (16<<20)) + return 0; + + return rl.rlim_cur - used; + */ + + return 0 + } + + //#ifdef GOARCH_386 + //#define sa_handler k_sa_handler + //#endif + + func sigreturn() + func sigtramp() + + func setsig(i int32, fn uintptr, restart bool) { + var sa sigactiont + memclr(unsafe.Pointer(&sa), unsafe.Sizeof(sa)) + sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK | _SA_RESTORER + if restart { + sa.sa_flags |= _SA_RESTART + } + sa.sa_mask = ^uint64(0) + // Although Linux manpage says "sa_restorer element is obsolete and + // should not be used". x86_64 kernel requires it. Only use it on + // x86. + if GOARCH == "386" || GOARCH == "amd64" { + sa.sa_restorer = funcPC(sigreturn) + } + if fn == funcPC(sighandler) { + fn = funcPC(sigtramp) + } + sa.sa_handler = fn + if rt_sigaction(uintptr(i), &sa, nil, unsafe.Sizeof(sa.sa_mask)) != 0 { + gothrow("rt_sigaction failure") + } + } + + func getsig(i int32) uintptr { + var sa sigactiont + + memclr(unsafe.Pointer(&sa), unsafe.Sizeof(sa)) + if rt_sigaction(uintptr(i), nil, &sa, unsafe.Sizeof(sa.sa_mask)) != 0 { + gothrow("rt_sigaction read failure") + } + if sa.sa_handler == funcPC(sigtramp) { + return funcPC(sighandler) + } + return sa.sa_handler + } + + func signalstack(p *byte, n int32) { + var st sigaltstackt + st.ss_sp = p + st.ss_size = uintptr(n) + st.ss_flags = 0 + if p == nil { + st.ss_flags = _SS_DISABLE + } + sigaltstack(&st, nil) + } + + func unblocksignals() { + rtsigprocmask(_SIG_SETMASK, &sigset_none, nil, int32(unsafe.Sizeof(sigset_none))) + } diff --cc src/runtime/os1_openbsd.go index 0000000000,5c6ea74121..d5ffe10a81 mode 000000,100644..100644 --- a/src/runtime/os1_openbsd.go +++ b/src/runtime/os1_openbsd.go @@@ -1,0 -1,235 +1,235 @@@ + // Copyright 2011 The Go Authors. All rights reserved. + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + + package runtime + + import "unsafe" + + const ( + ESRCH = 3 + EAGAIN = 35 + EWOULDBLOCK = EAGAIN + ENOTSUP = 91 + + // From OpenBSD's sys/time.h + CLOCK_REALTIME = 0 + CLOCK_VIRTUAL = 1 + CLOCK_PROF = 2 + CLOCK_MONOTONIC = 3 + ) + + var sigset_none = uint32(0) + var sigset_all = ^sigset_none + + // From OpenBSD's + const ( + CTL_HW = 6 + HW_NCPU = 3 + ) + + func getncpu() int32 { + mib := [2]uint32{CTL_HW, HW_NCPU} + out := uint32(0) + nout := unsafe.Sizeof(out) + + // Fetch hw.ncpu via sysctl. + ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0) + if ret >= 0 { + return int32(out) + } + return 1 + } + + //go:nosplit + func semacreate() uintptr { + return 1 + } + + //go:nosplit + func semasleep(ns int64) int32 { + _g_ := getg() + + // Compute sleep deadline. + var tsp *timespec + if ns >= 0 { + var ts timespec + var nsec int32 + ns += nanotime() - ts.set_sec(timediv(ns, 1000000000, &nsec)) ++ ts.set_sec(int64(timediv(ns, 1000000000, &nsec))) + ts.set_nsec(nsec) + tsp = &ts + } + + for { + // spin-mutex lock + for { + if xchg(&_g_.m.waitsemalock, 1) == 0 { + break + } + osyield() + } + + if _g_.m.waitsemacount != 0 { + // semaphore is available. + _g_.m.waitsemacount-- + // spin-mutex unlock + atomicstore(&_g_.m.waitsemalock, 0) + return 0 // semaphore acquired + } + + // sleep until semaphore != 0 or timeout. + // thrsleep unlocks m.waitsemalock. + ret := thrsleep((uintptr)(unsafe.Pointer(&_g_.m.waitsemacount)), CLOCK_MONOTONIC, tsp, (uintptr)(unsafe.Pointer(&_g_.m.waitsemalock)), (*int32)(unsafe.Pointer(&_g_.m.waitsemacount))) + if ret == EWOULDBLOCK { + return -1 + } + } + } + + //go:nosplit + func semawakeup(mp *m) { + // spin-mutex lock + for { + if xchg(&mp.waitsemalock, 1) == 0 { + break + } + osyield() + } + mp.waitsemacount++ + ret := thrwakeup(uintptr(unsafe.Pointer(&mp.waitsemacount)), 1) + if ret != 0 && ret != ESRCH { + // semawakeup can be called on signal stack. + systemstack(func() { + print("thrwakeup addr=", &mp.waitsemacount, " sem=", mp.waitsemacount, " ret=", ret, "\n") + }) + } + // spin-mutex unlock + atomicstore(&mp.waitsemalock, 0) + } + + func newosproc(mp *m, stk unsafe.Pointer) { + if false { + print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " id=", mp.id, "/", int32(mp.tls[0]), " ostk=", &mp, "\n") + } + + mp.tls[0] = uintptr(mp.id) // so 386 asm can find it + + param := tforkt{ + tf_tcb: unsafe.Pointer(&mp.tls[0]), + tf_tid: (*int32)(unsafe.Pointer(&mp.procid)), + tf_stack: uintptr(stk), + } + + oset := sigprocmask(_SIG_SETMASK, sigset_all) + ret := tfork(¶m, unsafe.Sizeof(param), mp, mp.g0, funcPC(mstart)) + sigprocmask(_SIG_SETMASK, oset) + + if ret < 0 { + print("runtime: failed to create new OS thread (have ", mcount()-1, " already; errno=", -ret, ")\n") + if ret == -ENOTSUP { + print("runtime: is kern.rthreads disabled?\n") + } + gothrow("runtime.newosproc") + } + } + + func osinit() { + ncpu = getncpu() + } + + var urandom_data [_HashRandomBytes]byte + var urandom_dev = []byte("/dev/urandom\x00") + + //go:nosplit + func get_random_data(rnd *unsafe.Pointer, rnd_len *int32) { + fd := open(&urandom_dev[0], 0 /* O_RDONLY */, 0) + if read(fd, unsafe.Pointer(&urandom_data), _HashRandomBytes) == _HashRandomBytes { + *rnd = unsafe.Pointer(&urandom_data[0]) + *rnd_len = _HashRandomBytes + } else { + *rnd = nil + *rnd_len = 0 + } + close(fd) + } + + func goenvs() { + goenvs_unix() + } + + // Called to initialize a new m (including the bootstrap m). + // Called on the parent thread (main thread in case of bootstrap), can allocate memory. + func mpreinit(mp *m) { + mp.gsignal = malg(32 * 1024) + mp.gsignal.m = mp + } + + // Called to initialize a new m (including the bootstrap m). + // Called on the new thread, can not allocate memory. + func minit() { + _g_ := getg() + + // m.procid is a uint64, but tfork writes an int32. Fix it up. + _g_.m.procid = uint64(*(*int32)(unsafe.Pointer(&_g_.m.procid))) + + // Initialize signal handling + signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024) + sigprocmask(_SIG_SETMASK, sigset_none) + } + + // Called from dropm to undo the effect of an minit. + func unminit() { + signalstack(nil, 0) + } + + func memlimit() uintptr { + return 0 + } + + func sigtramp() + + type sigactiont struct { + sa_sigaction uintptr + sa_mask uint32 + sa_flags int32 + } + + func setsig(i int32, fn uintptr, restart bool) { + var sa sigactiont + sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK + if restart { + sa.sa_flags |= _SA_RESTART + } + sa.sa_mask = sigset_all + if fn == funcPC(sighandler) { + fn = funcPC(sigtramp) + } + sa.sa_sigaction = fn + sigaction(i, &sa, nil) + } + + func getsig(i int32) uintptr { + var sa sigactiont + sigaction(i, nil, &sa) + if sa.sa_sigaction == funcPC(sigtramp) { + return funcPC(sighandler) + } + return sa.sa_sigaction + } + + func signalstack(p *byte, n int32) { + var st stackt + + st.ss_sp = uintptr(unsafe.Pointer(p)) + st.ss_size = uintptr(n) + st.ss_flags = 0 + if p == nil { + st.ss_flags = _SS_DISABLE + } + sigaltstack(&st, nil) + } + + func unblocksignals() { + sigprocmask(_SIG_SETMASK, sigset_none) + } diff --cc src/runtime/os_linux_386.go index 0000000000,c4f95804ac..adcd5a1c4e mode 000000,100644..100644 --- a/src/runtime/os_linux_386.go +++ b/src/runtime/os_linux_386.go @@@ -1,0 -1,37 +1,36 @@@ + // Copyright 2009 The Go Authors. All rights reserved. + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + + package runtime + + import "unsafe" + + const ( + _AT_NULL = 0 + _AT_RANDOM = 25 + _AT_SYSINFO = 32 + ) + + var _vdso uint32 + -//go:nosplit -func linux_setup_vdso(argc int32, argv **byte) { ++func sysargs(argc int32, argv **byte) { + // skip over argv, envv to get to auxv + n := argc + 1 + for argv_index(argv, n) != nil { + n++ + } + n++ + auxv := (*[1 << 28]uint32)(add(unsafe.Pointer(argv), uintptr(n)*ptrSize)) + + for i := 0; auxv[i] != _AT_NULL; i += 2 { + switch auxv[i] { + case _AT_SYSINFO: + _vdso = auxv[i+1] + + case _AT_RANDOM: + startup_random_data = (*byte)(unsafe.Pointer(uintptr(auxv[i+1]))) + startup_random_data_len = 16 + } + } + } diff --cc src/runtime/proc1.go index 0000000000,81b211d0d3..8c941dd35d mode 000000,100644..100644 --- a/src/runtime/proc1.go +++ b/src/runtime/proc1.go @@@ -1,0 -1,3170 +1,3186 @@@ + // Copyright 2009 The Go Authors. All rights reserved. + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + + package runtime + + import "unsafe" + + var ( + m0 m + g0 g + ) + + // Goroutine scheduler + // The scheduler's job is to distribute ready-to-run goroutines over worker threads. + // + // The main concepts are: + // G - goroutine. + // M - worker thread, or machine. + // P - processor, a resource that is required to execute Go code. + // M must have an associated P to execute Go code, however it can be + // blocked or in a syscall w/o an associated P. + // + // Design doc at http://golang.org/s/go11sched. + + const ( + // Number of goroutine ids to grab from sched.goidgen to local per-P cache at once. + // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number. + _GoidCacheBatch = 16 + ) + + /* + SchedT sched; + int32 gomaxprocs; + uint32 needextram; + bool iscgo; + M m0; + G g0; // idle goroutine for m0 + G* lastg; + M* allm; + M* extram; + P* allp[MaxGomaxprocs+1]; + int8* goos; + int32 ncpu; + int32 newprocs; + + Mutex allglock; // the following vars are protected by this lock or by stoptheworld + G** allg; + Slice allgs; + uintptr allglen; + ForceGCState forcegc; + + void mstart(void); + static void runqput(P*, G*); + static G* runqget(P*); + static bool runqputslow(P*, G*, uint32, uint32); + static G* runqsteal(P*, P*); + static void mput(M*); + static M* mget(void); + static void mcommoninit(M*); + static void schedule(void); + static void procresize(int32); + static void acquirep(P*); + static P* releasep(void); + static void newm(void(*)(void), P*); + static void stopm(void); + static void startm(P*, bool); + static void handoffp(P*); + static void wakep(void); + static void stoplockedm(void); + static void startlockedm(G*); + static void sysmon(void); + static uint32 retake(int64); + static void incidlelocked(int32); + static void checkdead(void); + static void exitsyscall0(G*); + void park_m(G*); + static void goexit0(G*); + static void gfput(P*, G*); + static G* gfget(P*); + static void gfpurge(P*); + static void globrunqput(G*); + static void globrunqputbatch(G*, G*, int32); + static G* globrunqget(P*, int32); + static P* pidleget(void); + static void pidleput(P*); + static void injectglist(G*); + static bool preemptall(void); + static bool preemptone(P*); + static bool exitsyscallfast(void); + static bool haveexperiment(int8*); + void allgadd(G*); + static void dropg(void); + + extern String buildVersion; + */ + + // The bootstrap sequence is: + // + // call osinit + // call schedinit + // make & queue new G + // call runtime·mstart + // + // The new G calls runtime·main. + func schedinit() { + // raceinit must be the first call to race detector. + // In particular, it must be done before mallocinit below calls racemapshadow. + _g_ := getg() + if raceenabled { + _g_.racectx = raceinit() + } + + sched.maxmcount = 10000 + + tracebackinit() + symtabinit() + stackinit() + mallocinit() + mcommoninit(_g_.m) + + goargs() + goenvs() + parsedebugvars() + gcinit() + + sched.lastpoll = uint64(nanotime()) + procs := 1 + if n := goatoi(gogetenv("GOMAXPROCS")); n > 0 { + if n > _MaxGomaxprocs { + n = _MaxGomaxprocs + } + procs = n + } + procresize(int32(procs)) + + if buildVersion == "" { + // Condition should never trigger. This code just serves + // to ensure runtime·buildVersion is kept in the resulting binary. + buildVersion = "unknown" + } + } + + func newsysmon() { + _newm(sysmon, nil) + } + + func dumpgstatus(gp *g) { + _g_ := getg() + print("runtime: gp: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") + print("runtime: g: g=", _g_, ", goid=", _g_.goid, ", g->atomicstatus=", readgstatus(_g_), "\n") + } + + func checkmcount() { + // sched lock is held + if sched.mcount > sched.maxmcount { + print("runtime: program exceeds ", sched.maxmcount, "-thread limit\n") + gothrow("thread exhaustion") + } + } + + func mcommoninit(mp *m) { + _g_ := getg() + + // g0 stack won't make sense for user (and is not necessary unwindable). + if _g_ != _g_.m.g0 { + callers(1, &mp.createstack[0], len(mp.createstack)) + } + + mp.fastrand = 0x49f6428a + uint32(mp.id) + uint32(cputicks()) + if mp.fastrand == 0 { + mp.fastrand = 0x49f6428a + } + + lock(&sched.lock) + mp.id = sched.mcount + sched.mcount++ + checkmcount() + mpreinit(mp) + if mp.gsignal != nil { + mp.gsignal.stackguard1 = mp.gsignal.stack.lo + _StackGuard + } + + // Add to allm so garbage collector doesn't free g->m + // when it is just in a register or thread-local storage. + mp.alllink = allm + + // NumCgoCall() iterates over allm w/o schedlock, + // so we need to publish it safely. + atomicstorep(unsafe.Pointer(&allm), unsafe.Pointer(mp)) + unlock(&sched.lock) + } + + // Mark gp ready to run. + func ready(gp *g) { + status := readgstatus(gp) + + // Mark runnable. + _g_ := getg() + _g_.m.locks++ // disable preemption because it can be holding p in a local var + if status&^_Gscan != _Gwaiting { + dumpgstatus(gp) + gothrow("bad g->status in ready") + } + + // status is Gwaiting or Gscanwaiting, make Grunnable and put on runq + casgstatus(gp, _Gwaiting, _Grunnable) + runqput(_g_.m.p, gp) + if atomicload(&sched.npidle) != 0 && atomicload(&sched.nmspinning) == 0 { // TODO: fast atomic + wakep() + } + _g_.m.locks-- + if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack + _g_.stackguard0 = stackPreempt + } + } + + func gcprocs() int32 { + // Figure out how many CPUs to use during GC. + // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc. + lock(&sched.lock) + n := gomaxprocs + if n > ncpu { + n = ncpu + } + if n > _MaxGcproc { + n = _MaxGcproc + } + if n > sched.nmidle+1 { // one M is currently running + n = sched.nmidle + 1 + } + unlock(&sched.lock) + return n + } + + func needaddgcproc() bool { + lock(&sched.lock) + n := gomaxprocs + if n > ncpu { + n = ncpu + } + if n > _MaxGcproc { + n = _MaxGcproc + } + n -= sched.nmidle + 1 // one M is currently running + unlock(&sched.lock) + return n > 0 + } + + func helpgc(nproc int32) { + _g_ := getg() + lock(&sched.lock) + pos := 0 + for n := int32(1); n < nproc; n++ { // one M is currently running + if allp[pos].mcache == _g_.m.mcache { + pos++ + } + mp := mget() + if mp == nil { + gothrow("gcprocs inconsistency") + } + mp.helpgc = n + mp.mcache = allp[pos].mcache + pos++ + notewakeup(&mp.park) + } + unlock(&sched.lock) + } + + // Similar to stoptheworld but best-effort and can be called several times. + // There is no reverse operation, used during crashing. + // This function must not lock any mutexes. + func freezetheworld() { + if gomaxprocs == 1 { + return + } + // stopwait and preemption requests can be lost + // due to races with concurrently executing threads, + // so try several times + for i := 0; i < 5; i++ { + // this should tell the scheduler to not start any new goroutines + sched.stopwait = 0x7fffffff + atomicstore(&sched.gcwaiting, 1) + // this should stop running goroutines + if !preemptall() { + break // no running goroutines + } + usleep(1000) + } + // to be sure + usleep(1000) + preemptall() + usleep(1000) + } + + func isscanstatus(status uint32) bool { + if status == _Gscan { + gothrow("isscanstatus: Bad status Gscan") + } + return status&_Gscan == _Gscan + } + + // All reads and writes of g's status go through readgstatus, casgstatus + // castogscanstatus, casfrom_Gscanstatus. + //go:nosplit + func readgstatus(gp *g) uint32 { + return atomicload(&gp.atomicstatus) + } + + // The Gscanstatuses are acting like locks and this releases them. + // If it proves to be a performance hit we should be able to make these + // simple atomic stores but for now we are going to throw if + // we see an inconsistent state. + func casfrom_Gscanstatus(gp *g, oldval, newval uint32) { + success := false + + // Check that transition is valid. + switch oldval { + case _Gscanrunnable, + _Gscanwaiting, + _Gscanrunning, + _Gscansyscall: + if newval == oldval&^_Gscan { + success = cas(&gp.atomicstatus, oldval, newval) + } + case _Gscanenqueue: + if newval == _Gwaiting { + success = cas(&gp.atomicstatus, oldval, newval) + } + } + if !success { + print("runtime: casfrom_Gscanstatus failed gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n") + dumpgstatus(gp) + gothrow("casfrom_Gscanstatus: gp->status is not in scan state") + } + } + + // This will return false if the gp is not in the expected status and the cas fails. + // This acts like a lock acquire while the casfromgstatus acts like a lock release. + func castogscanstatus(gp *g, oldval, newval uint32) bool { + switch oldval { + case _Grunnable, + _Gwaiting, + _Gsyscall: + if newval == oldval|_Gscan { + return cas(&gp.atomicstatus, oldval, newval) + } + case _Grunning: + if newval == _Gscanrunning || newval == _Gscanenqueue { + return cas(&gp.atomicstatus, oldval, newval) + } + } + print("runtime: castogscanstatus oldval=", hex(oldval), " newval=", hex(newval), "\n") + gothrow("castogscanstatus") + panic("not reached") + } + + // If asked to move to or from a Gscanstatus this will throw. Use the castogscanstatus + // and casfrom_Gscanstatus instead. + // casgstatus will loop if the g->atomicstatus is in a Gscan status until the routine that + // put it in the Gscan state is finished. + //go:nosplit + func casgstatus(gp *g, oldval, newval uint32) { + if (oldval&_Gscan != 0) || (newval&_Gscan != 0) || oldval == newval { + systemstack(func() { + print("casgstatus: oldval=", hex(oldval), " newval=", hex(newval), "\n") + gothrow("casgstatus: bad incoming values") + }) + } + + // loop if gp->atomicstatus is in a scan state giving + // GC time to finish and change the state to oldval. + for !cas(&gp.atomicstatus, oldval, newval) { - // Help GC if needed. - if gp.preemptscan && !gp.gcworkdone && (oldval == _Grunning || oldval == _Gsyscall) { - gp.preemptscan = false - systemstack(func() { - gcphasework(gp) - }) - } + } + } + + // stopg ensures that gp is stopped at a GC safe point where its stack can be scanned + // or in the context of a moving collector the pointers can be flipped from pointing + // to old object to pointing to new objects. + // If stopg returns true, the caller knows gp is at a GC safe point and will remain there until + // the caller calls restartg. + // If stopg returns false, the caller is not responsible for calling restartg. This can happen + // if another thread, either the gp itself or another GC thread is taking the responsibility + // to do the GC work related to this thread. + func stopg(gp *g) bool { + for { + if gp.gcworkdone { + return false + } + + switch s := readgstatus(gp); s { + default: + dumpgstatus(gp) + gothrow("stopg: gp->atomicstatus is not valid") + + case _Gdead: + return false + + case _Gcopystack: + // Loop until a new stack is in place. + + case _Grunnable, + _Gsyscall, + _Gwaiting: + // Claim goroutine by setting scan bit. + if !castogscanstatus(gp, s, s|_Gscan) { + break + } + // In scan state, do work. + gcphasework(gp) + return true + + case _Gscanrunnable, + _Gscanwaiting, + _Gscansyscall: + // Goroutine already claimed by another GC helper. + return false + + case _Grunning: ++ if gcphase == _GCscan { ++ // Running routines not scanned during ++ // GCscan phase, we only scan non-running routines. ++ gp.gcworkdone = true ++ return false ++ } ++ + // Claim goroutine, so we aren't racing with a status + // transition away from Grunning. + if !castogscanstatus(gp, _Grunning, _Gscanrunning) { + break + } + + // Mark gp for preemption. + if !gp.gcworkdone { + gp.preemptscan = true + gp.preempt = true + gp.stackguard0 = stackPreempt + } + + // Unclaim. + casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning) + return false + } + } + } + + // The GC requests that this routine be moved from a scanmumble state to a mumble state. + func restartg(gp *g) { + s := readgstatus(gp) + switch s { + default: + dumpgstatus(gp) + gothrow("restartg: unexpected status") + + case _Gdead: + // ok + + case _Gscanrunnable, + _Gscanwaiting, + _Gscansyscall: + casfrom_Gscanstatus(gp, s, s&^_Gscan) + + // Scan is now completed. + // Goroutine now needs to be made runnable. + // We put it on the global run queue; ready blocks on the global scheduler lock. + case _Gscanenqueue: + casfrom_Gscanstatus(gp, _Gscanenqueue, _Gwaiting) + if gp != getg().m.curg { + gothrow("processing Gscanenqueue on wrong m") + } + dropg() + ready(gp) + } + } + + func stopscanstart(gp *g) { + _g_ := getg() + if _g_ == gp { + gothrow("GC not moved to G0") + } + if stopg(gp) { + if !isscanstatus(readgstatus(gp)) { + dumpgstatus(gp) + gothrow("GC not in scan state") + } + restartg(gp) + } + } + + // Runs on g0 and does the actual work after putting the g back on the run queue. + func mquiesce(gpmaster *g) { - activeglen := len(allgs) + // enqueue the calling goroutine. + restartg(gpmaster) ++ ++ activeglen := len(allgs) + for i := 0; i < activeglen; i++ { + gp := allgs[i] + if readgstatus(gp) == _Gdead { + gp.gcworkdone = true // noop scan. + } else { + gp.gcworkdone = false + } + stopscanstart(gp) + } + + // Check that the G's gcwork (such as scanning) has been done. If not do it now. + // You can end up doing work here if the page trap on a Grunning Goroutine has + // not been sprung or in some race situations. For example a runnable goes dead + // and is started up again with a gp->gcworkdone set to false. + for i := 0; i < activeglen; i++ { + gp := allgs[i] + for !gp.gcworkdone { + status := readgstatus(gp) + if status == _Gdead { + //do nothing, scan not needed. + gp.gcworkdone = true // scan is a noop + break + } + if status == _Grunning && gp.stackguard0 == uintptr(stackPreempt) && notetsleep(&sched.stopnote, 100*1000) { // nanosecond arg + noteclear(&sched.stopnote) + } else { + stopscanstart(gp) + } + } + } + + for i := 0; i < activeglen; i++ { + gp := allgs[i] + status := readgstatus(gp) + if isscanstatus(status) { + print("mstopandscang:bottom: post scan bad status gp=", gp, " has status ", hex(status), "\n") + dumpgstatus(gp) + } + if !gp.gcworkdone && status != _Gdead { + print("mstopandscang:bottom: post scan gp=", gp, "->gcworkdone still false\n") + dumpgstatus(gp) + } + } + + schedule() // Never returns. + } + + // quiesce moves all the goroutines to a GC safepoint which for now is a at preemption point. + // If the global gcphase is GCmark quiesce will ensure that all of the goroutine's stacks + // have been scanned before it returns. + func quiesce(mastergp *g) { + castogscanstatus(mastergp, _Grunning, _Gscanenqueue) + // Now move this to the g0 (aka m) stack. + // g0 will potentially scan this thread and put mastergp on the runqueue + mcall(mquiesce) + } + + // This is used by the GC as well as the routines that do stack dumps. In the case + // of GC all the routines can be reliably stopped. This is not always the case + // when the system is in panic or being exited. + func stoptheworld() { + _g_ := getg() + + // If we hold a lock, then we won't be able to stop another M + // that is blocked trying to acquire the lock. + if _g_.m.locks > 0 { + gothrow("stoptheworld: holding locks") + } + + lock(&sched.lock) + sched.stopwait = gomaxprocs + atomicstore(&sched.gcwaiting, 1) + preemptall() + // stop current P + _g_.m.p.status = _Pgcstop // Pgcstop is only diagnostic. + sched.stopwait-- + // try to retake all P's in Psyscall status + for i := 0; i < int(gomaxprocs); i++ { + p := allp[i] + s := p.status + if s == _Psyscall && cas(&p.status, s, _Pgcstop) { + sched.stopwait-- + } + } + // stop idle P's + for { + p := pidleget() + if p == nil { + break + } + p.status = _Pgcstop + sched.stopwait-- + } + wait := sched.stopwait > 0 + unlock(&sched.lock) + + // wait for remaining P's to stop voluntarily + if wait { + for { + // wait for 100us, then try to re-preempt in case of any races + if notetsleep(&sched.stopnote, 100*1000) { + noteclear(&sched.stopnote) + break + } + preemptall() + } + } + if sched.stopwait != 0 { + gothrow("stoptheworld: not stopped") + } + for i := 0; i < int(gomaxprocs); i++ { + p := allp[i] + if p.status != _Pgcstop { + gothrow("stoptheworld: not stopped") + } + } + } + + func mhelpgc() { + _g_ := getg() + _g_.m.helpgc = -1 + } + + func starttheworld() { + _g_ := getg() + + _g_.m.locks++ // disable preemption because it can be holding p in a local var + gp := netpoll(false) // non-blocking + injectglist(gp) + add := needaddgcproc() + lock(&sched.lock) + if newprocs != 0 { + procresize(newprocs) + newprocs = 0 + } else { + procresize(gomaxprocs) + } + sched.gcwaiting = 0 + + var p1 *p + for { + p := pidleget() + if p == nil { + break + } + // procresize() puts p's with work at the beginning of the list. + // Once we reach a p without a run queue, the rest don't have one either. + if p.runqhead == p.runqtail { + pidleput(p) + break + } + p.m = mget() + p.link = p1 + p1 = p + } + if sched.sysmonwait != 0 { + sched.sysmonwait = 0 + notewakeup(&sched.sysmonnote) + } + unlock(&sched.lock) + + for p1 != nil { + p := p1 + p1 = p1.link + if p.m != nil { + mp := p.m + p.m = nil + if mp.nextp != nil { + gothrow("starttheworld: inconsistent mp->nextp") + } + mp.nextp = p + notewakeup(&mp.park) + } else { + // Start M to run P. Do not start another M below. + _newm(nil, p) + add = false + } + } + + if add { + // If GC could have used another helper proc, start one now, + // in the hope that it will be available next time. + // It would have been even better to start it before the collection, + // but doing so requires allocating memory, so it's tricky to + // coordinate. This lazy approach works out in practice: + // we don't mind if the first couple gc rounds don't have quite + // the maximum number of procs. + _newm(mhelpgc, nil) + } + _g_.m.locks-- + if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack + _g_.stackguard0 = stackPreempt + } + } + + // Called to start an M. + //go:nosplit + func mstart() { + _g_ := getg() + + if _g_.stack.lo == 0 { + // Initialize stack bounds from system stack. + // Cgo may have left stack size in stack.hi. + size := _g_.stack.hi + if size == 0 { + size = 8192 + } + _g_.stack.hi = uintptr(noescape(unsafe.Pointer(&size))) + _g_.stack.lo = _g_.stack.hi - size + 1024 + } + // Initialize stack guards so that we can start calling + // both Go and C functions with stack growth prologues. + _g_.stackguard0 = _g_.stack.lo + _StackGuard + _g_.stackguard1 = _g_.stackguard0 + mstart1() + } + + func mstart1() { + _g_ := getg() + + if _g_ != _g_.m.g0 { + gothrow("bad runtime·mstart") + } + + // Record top of stack for use by mcall. + // Once we call schedule we're never coming back, + // so other calls can reuse this stack space. + gosave(&_g_.m.g0.sched) + _g_.m.g0.sched.pc = ^uintptr(0) // make sure it is never used + asminit() + minit() + + // Install signal handlers; after minit so that minit can + // prepare the thread to be able to handle the signals. + if _g_.m == &m0 { + initsig() + } + + if _g_.m.mstartfn != nil { + fn := *(*func())(unsafe.Pointer(&_g_.m.mstartfn)) + fn() + } + + if _g_.m.helpgc != 0 { + _g_.m.helpgc = 0 + stopm() + } else if _g_.m != &m0 { + acquirep(_g_.m.nextp) + _g_.m.nextp = nil + } + schedule() + + // TODO(brainman): This point is never reached, because scheduler + // does not release os threads at the moment. But once this path + // is enabled, we must remove our seh here. + } + + // When running with cgo, we call _cgo_thread_start + // to start threads for us so that we can play nicely with + // foreign code. + var cgoThreadStart unsafe.Pointer + + type cgothreadstart struct { + g *g + tls *uint64 + fn unsafe.Pointer + } + + // Allocate a new m unassociated with any thread. + // Can use p for allocation context if needed. + func allocm(_p_ *p) *m { + _g_ := getg() + _g_.m.locks++ // disable GC because it can be called from sysmon + if _g_.m.p == nil { + acquirep(_p_) // temporarily borrow p for mallocs in this function + } + mp := newM() + mcommoninit(mp) + + // In case of cgo or Solaris, pthread_create will make us a stack. + // Windows and Plan 9 will layout sched stack on OS stack. + if iscgo || GOOS == "solaris" || GOOS == "windows" || GOOS == "plan9" { + mp.g0 = malg(-1) + } else { + mp.g0 = malg(8192) + } + mp.g0.m = mp + + if _p_ == _g_.m.p { + releasep() + } + _g_.m.locks-- + if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack + _g_.stackguard0 = stackPreempt + } + + return mp + } + + func allocg() *g { + return newG() + } + + // needm is called when a cgo callback happens on a + // thread without an m (a thread not created by Go). + // In this case, needm is expected to find an m to use + // and return with m, g initialized correctly. + // Since m and g are not set now (likely nil, but see below) + // needm is limited in what routines it can call. In particular + // it can only call nosplit functions (textflag 7) and cannot + // do any scheduling that requires an m. + // + // In order to avoid needing heavy lifting here, we adopt + // the following strategy: there is a stack of available m's + // that can be stolen. Using compare-and-swap + // to pop from the stack has ABA races, so we simulate + // a lock by doing an exchange (via casp) to steal the stack + // head and replace the top pointer with MLOCKED (1). + // This serves as a simple spin lock that we can use even + // without an m. The thread that locks the stack in this way + // unlocks the stack by storing a valid stack head pointer. + // + // In order to make sure that there is always an m structure + // available to be stolen, we maintain the invariant that there + // is always one more than needed. At the beginning of the + // program (if cgo is in use) the list is seeded with a single m. + // If needm finds that it has taken the last m off the list, its job + // is - once it has installed its own m so that it can do things like + // allocate memory - to create a spare m and put it on the list. + // + // Each of these extra m's also has a g0 and a curg that are + // pressed into service as the scheduling stack and current + // goroutine for the duration of the cgo callback. + // + // When the callback is done with the m, it calls dropm to + // put the m back on the list. + //go:nosplit + func needm(x byte) { + if needextram != 0 { + // Can happen if C/C++ code calls Go from a global ctor. + // Can not throw, because scheduler is not initialized yet. + // XXX + // write(2, unsafe.Pointer("fatal error: cgo callback before cgo call\n"), sizeof("fatal error: cgo callback before cgo call\n") - 1) + exit(1) + } + + // Lock extra list, take head, unlock popped list. + // nilokay=false is safe here because of the invariant above, + // that the extra list always contains or will soon contain + // at least one m. + mp := lockextra(false) + + // Set needextram when we've just emptied the list, + // so that the eventual call into cgocallbackg will + // allocate a new m for the extra list. We delay the + // allocation until then so that it can be done + // after exitsyscall makes sure it is okay to be + // running at all (that is, there's no garbage collection + // running right now). + mp.needextram = mp.schedlink == nil + unlockextra(mp.schedlink) + + // Install g (= m->g0) and set the stack bounds + // to match the current stack. We don't actually know + // how big the stack is, like we don't know how big any + // scheduling stack is, but we assume there's at least 32 kB, + // which is more than enough for us. + setg(mp.g0) + _g_ := getg() + _g_.stack.hi = uintptr(noescape(unsafe.Pointer(&x))) + 1024 + _g_.stack.lo = uintptr(noescape(unsafe.Pointer(&x))) - 32*1024 + _g_.stackguard0 = _g_.stack.lo + _StackGuard + + // Initialize this thread to use the m. + asminit() + minit() + } + + // newextram allocates an m and puts it on the extra list. + // It is called with a working local m, so that it can do things + // like call schedlock and allocate. + func newextram() { + // Create extra goroutine locked to extra m. + // The goroutine is the context in which the cgo callback will run. + // The sched.pc will never be returned to, but setting it to + // goexit makes clear to the traceback routines where + // the goroutine stack ends. + mp := allocm(nil) + gp := malg(4096) + gp.sched.pc = funcPC(goexit) + _PCQuantum + gp.sched.sp = gp.stack.hi + gp.sched.sp -= 4 * regSize // extra space in case of reads slightly beyond frame + gp.sched.lr = 0 + gp.sched.g = gp + gp.syscallpc = gp.sched.pc + gp.syscallsp = gp.sched.sp + // malg returns status as Gidle, change to Gsyscall before adding to allg + // where GC will see it. + casgstatus(gp, _Gidle, _Gsyscall) + gp.m = mp + mp.curg = gp + mp.locked = _LockInternal + mp.lockedg = gp + gp.lockedm = mp + gp.goid = int64(xadd64(&sched.goidgen, 1)) + if raceenabled { + gp.racectx = racegostart(funcPC(newextram)) + } + // put on allg for garbage collector + allgadd(gp) + + // Add m to the extra list. + mnext := lockextra(true) + mp.schedlink = mnext + unlockextra(mp) + } + + // dropm is called when a cgo callback has called needm but is now + // done with the callback and returning back into the non-Go thread. + // It puts the current m back onto the extra list. + // + // The main expense here is the call to signalstack to release the + // m's signal stack, and then the call to needm on the next callback + // from this thread. It is tempting to try to save the m for next time, + // which would eliminate both these costs, but there might not be + // a next time: the current thread (which Go does not control) might exit. + // If we saved the m for that thread, there would be an m leak each time + // such a thread exited. Instead, we acquire and release an m on each + // call. These should typically not be scheduling operations, just a few + // atomics, so the cost should be small. + // + // TODO(rsc): An alternative would be to allocate a dummy pthread per-thread + // variable using pthread_key_create. Unlike the pthread keys we already use + // on OS X, this dummy key would never be read by Go code. It would exist + // only so that we could register at thread-exit-time destructor. + // That destructor would put the m back onto the extra list. + // This is purely a performance optimization. The current version, + // in which dropm happens on each cgo call, is still correct too. + // We may have to keep the current version on systems with cgo + // but without pthreads, like Windows. + func dropm() { + // Undo whatever initialization minit did during needm. + unminit() + + // Clear m and g, and return m to the extra list. + // After the call to setmg we can only call nosplit functions. + mp := getg().m + setg(nil) + + mnext := lockextra(true) + mp.schedlink = mnext + unlockextra(mp) + } + + var extram uintptr + + // lockextra locks the extra list and returns the list head. + // The caller must unlock the list by storing a new list head + // to extram. If nilokay is true, then lockextra will + // return a nil list head if that's what it finds. If nilokay is false, + // lockextra will keep waiting until the list head is no longer nil. + //go:nosplit + func lockextra(nilokay bool) *m { + const locked = 1 + + for { + old := atomicloaduintptr(&extram) + if old == locked { + yield := osyield + yield() + continue + } + if old == 0 && !nilokay { + usleep(1) + continue + } + if casuintptr(&extram, old, locked) { + return (*m)(unsafe.Pointer(old)) + } + yield := osyield + yield() + continue + } + } + + //go:nosplit + func unlockextra(mp *m) { + atomicstoreuintptr(&extram, uintptr(unsafe.Pointer(mp))) + } + + // Create a new m. It will start off with a call to fn, or else the scheduler. + func _newm(fn func(), _p_ *p) { + mp := allocm(_p_) + mp.nextp = _p_ + mp.mstartfn = *(*unsafe.Pointer)(unsafe.Pointer(&fn)) + + if iscgo { + var ts cgothreadstart + if _cgo_thread_start == nil { + gothrow("_cgo_thread_start missing") + } + ts.g = mp.g0 + ts.tls = (*uint64)(unsafe.Pointer(&mp.tls[0])) + ts.fn = unsafe.Pointer(funcPC(mstart)) + asmcgocall(_cgo_thread_start, unsafe.Pointer(&ts)) + return + } + newosproc(mp, unsafe.Pointer(mp.g0.stack.hi)) + } + + // Stops execution of the current m until new work is available. + // Returns with acquired P. + func stopm() { + _g_ := getg() + + if _g_.m.locks != 0 { + gothrow("stopm holding locks") + } + if _g_.m.p != nil { + gothrow("stopm holding p") + } + if _g_.m.spinning { + _g_.m.spinning = false + xadd(&sched.nmspinning, -1) + } + + retry: + lock(&sched.lock) + mput(_g_.m) + unlock(&sched.lock) + notesleep(&_g_.m.park) + noteclear(&_g_.m.park) + if _g_.m.helpgc != 0 { + gchelper() + _g_.m.helpgc = 0 + _g_.m.mcache = nil + goto retry + } + acquirep(_g_.m.nextp) + _g_.m.nextp = nil + } + + func mspinning() { + getg().m.spinning = true + } + + // Schedules some M to run the p (creates an M if necessary). + // If p==nil, tries to get an idle P, if no idle P's does nothing. + func startm(_p_ *p, spinning bool) { + lock(&sched.lock) + if _p_ == nil { + _p_ = pidleget() + if _p_ == nil { + unlock(&sched.lock) + if spinning { + xadd(&sched.nmspinning, -1) + } + return + } + } + mp := mget() + unlock(&sched.lock) + if mp == nil { + var fn func() + if spinning { + fn = mspinning + } + _newm(fn, _p_) + return + } + if mp.spinning { + gothrow("startm: m is spinning") + } + if mp.nextp != nil { + gothrow("startm: m has p") + } + mp.spinning = spinning + mp.nextp = _p_ + notewakeup(&mp.park) + } + + // Hands off P from syscall or locked M. + func handoffp(_p_ *p) { + // if it has local work, start it straight away + if _p_.runqhead != _p_.runqtail || sched.runqsize != 0 { + startm(_p_, false) + return + } + // no local work, check that there are no spinning/idle M's, + // otherwise our help is not required + if atomicload(&sched.nmspinning)+atomicload(&sched.npidle) == 0 && cas(&sched.nmspinning, 0, 1) { // TODO: fast atomic + startm(_p_, true) + return + } + lock(&sched.lock) + if sched.gcwaiting != 0 { + _p_.status = _Pgcstop + sched.stopwait-- + if sched.stopwait == 0 { + notewakeup(&sched.stopnote) + } + unlock(&sched.lock) + return + } + if sched.runqsize != 0 { + unlock(&sched.lock) + startm(_p_, false) + return + } + // If this is the last running P and nobody is polling network, + // need to wakeup another M to poll network. + if sched.npidle == uint32(gomaxprocs-1) && atomicload64(&sched.lastpoll) != 0 { + unlock(&sched.lock) + startm(_p_, false) + return + } + pidleput(_p_) + unlock(&sched.lock) + } + + // Tries to add one more P to execute G's. + // Called when a G is made runnable (newproc, ready). + func wakep() { + // be conservative about spinning threads + if !cas(&sched.nmspinning, 0, 1) { + return + } + startm(nil, true) + } + + // Stops execution of the current m that is locked to a g until the g is runnable again. + // Returns with acquired P. + func stoplockedm() { + _g_ := getg() + + if _g_.m.lockedg == nil || _g_.m.lockedg.lockedm != _g_.m { + gothrow("stoplockedm: inconsistent locking") + } + if _g_.m.p != nil { + // Schedule another M to run this p. + _p_ := releasep() + handoffp(_p_) + } + incidlelocked(1) + // Wait until another thread schedules lockedg again. + notesleep(&_g_.m.park) + noteclear(&_g_.m.park) + status := readgstatus(_g_.m.lockedg) + if status&^_Gscan != _Grunnable { + print("runtime:stoplockedm: g is not Grunnable or Gscanrunnable\n") + dumpgstatus(_g_) + gothrow("stoplockedm: not runnable") + } + acquirep(_g_.m.nextp) + _g_.m.nextp = nil + } + + // Schedules the locked m to run the locked gp. + func startlockedm(gp *g) { + _g_ := getg() + + mp := gp.lockedm + if mp == _g_.m { + gothrow("startlockedm: locked to me") + } + if mp.nextp != nil { + gothrow("startlockedm: m has p") + } + // directly handoff current P to the locked m + incidlelocked(-1) + _p_ := releasep() + mp.nextp = _p_ + notewakeup(&mp.park) + stopm() + } + + // Stops the current m for stoptheworld. + // Returns when the world is restarted. + func gcstopm() { + _g_ := getg() + + if sched.gcwaiting == 0 { + gothrow("gcstopm: not waiting for gc") + } + if _g_.m.spinning { + _g_.m.spinning = false + xadd(&sched.nmspinning, -1) + } + _p_ := releasep() + lock(&sched.lock) + _p_.status = _Pgcstop + sched.stopwait-- + if sched.stopwait == 0 { + notewakeup(&sched.stopnote) + } + unlock(&sched.lock) + stopm() + } + + // Schedules gp to run on the current M. + // Never returns. + func execute(gp *g) { + _g_ := getg() + + casgstatus(gp, _Grunnable, _Grunning) + gp.waitsince = 0 + gp.preempt = false + gp.stackguard0 = gp.stack.lo + _StackGuard + _g_.m.p.schedtick++ + _g_.m.curg = gp + gp.m = _g_.m + + // Check whether the profiler needs to be turned on or off. + hz := sched.profilehz + if _g_.m.profilehz != hz { + resetcpuprofiler(hz) + } + + gogo(&gp.sched) + } + + // Finds a runnable goroutine to execute. + // Tries to steal from other P's, get g from global queue, poll network. + func findrunnable() *g { + _g_ := getg() + + top: + if sched.gcwaiting != 0 { + gcstopm() + goto top + } + if fingwait && fingwake { + if gp := wakefing(); gp != nil { + ready(gp) + } + } + + // local runq + if gp := runqget(_g_.m.p); gp != nil { + return gp + } + + // global runq + if sched.runqsize != 0 { + lock(&sched.lock) + gp := globrunqget(_g_.m.p, 0) + unlock(&sched.lock) + if gp != nil { + return gp + } + } + + // poll network - returns list of goroutines + if gp := netpoll(false); gp != nil { // non-blocking + injectglist(gp.schedlink) + casgstatus(gp, _Gwaiting, _Grunnable) + return gp + } + + // If number of spinning M's >= number of busy P's, block. + // This is necessary to prevent excessive CPU consumption + // when GOMAXPROCS>>1 but the program parallelism is low. + if !_g_.m.spinning && 2*atomicload(&sched.nmspinning) >= uint32(gomaxprocs)-atomicload(&sched.npidle) { // TODO: fast atomic + goto stop + } + if !_g_.m.spinning { + _g_.m.spinning = true + xadd(&sched.nmspinning, 1) + } + // random steal from other P's + for i := 0; i < int(2*gomaxprocs); i++ { + if sched.gcwaiting != 0 { + goto top + } + _p_ := allp[fastrand1()%uint32(gomaxprocs)] + var gp *g + if _p_ == _g_.m.p { + gp = runqget(_p_) + } else { + gp = runqsteal(_g_.m.p, _p_) + } + if gp != nil { + return gp + } + } + stop: + + // return P and block + lock(&sched.lock) + if sched.gcwaiting != 0 { + unlock(&sched.lock) + goto top + } + if sched.runqsize != 0 { + gp := globrunqget(_g_.m.p, 0) + unlock(&sched.lock) + return gp + } + _p_ := releasep() + pidleput(_p_) + unlock(&sched.lock) + if _g_.m.spinning { + _g_.m.spinning = false + xadd(&sched.nmspinning, -1) + } + + // check all runqueues once again + for i := 0; i < int(gomaxprocs); i++ { + _p_ := allp[i] + if _p_ != nil && _p_.runqhead != _p_.runqtail { + lock(&sched.lock) + _p_ = pidleget() + unlock(&sched.lock) + if _p_ != nil { + acquirep(_p_) + goto top + } + break + } + } + + // poll network + if xchg64(&sched.lastpoll, 0) != 0 { + if _g_.m.p != nil { + gothrow("findrunnable: netpoll with p") + } + if _g_.m.spinning { + gothrow("findrunnable: netpoll with spinning") + } + gp := netpoll(true) // block until new work is available + atomicstore64(&sched.lastpoll, uint64(nanotime())) + if gp != nil { + lock(&sched.lock) + _p_ = pidleget() + unlock(&sched.lock) + if _p_ != nil { + acquirep(_p_) + injectglist(gp.schedlink) + casgstatus(gp, _Gwaiting, _Grunnable) + return gp + } + injectglist(gp) + } + } + stopm() + goto top + } + + func resetspinning() { + _g_ := getg() + + var nmspinning uint32 + if _g_.m.spinning { + _g_.m.spinning = false + nmspinning = xadd(&sched.nmspinning, -1) + if nmspinning < 0 { + gothrow("findrunnable: negative nmspinning") + } + } else { + nmspinning = atomicload(&sched.nmspinning) + } + + // M wakeup policy is deliberately somewhat conservative (see nmspinning handling), + // so see if we need to wakeup another P here. + if nmspinning == 0 && atomicload(&sched.npidle) > 0 { + wakep() + } + } + + // Injects the list of runnable G's into the scheduler. + // Can run concurrently with GC. + func injectglist(glist *g) { + if glist == nil { + return + } + lock(&sched.lock) + var n int + for n = 0; glist != nil; n++ { + gp := glist + glist = gp.schedlink + casgstatus(gp, _Gwaiting, _Grunnable) + globrunqput(gp) + } + unlock(&sched.lock) + for ; n != 0 && sched.npidle != 0; n-- { + startm(nil, false) + } + } + + // One round of scheduler: find a runnable goroutine and execute it. + // Never returns. + func schedule() { + _g_ := getg() + + if _g_.m.locks != 0 { + gothrow("schedule: holding locks") + } + + if _g_.m.lockedg != nil { + stoplockedm() + execute(_g_.m.lockedg) // Never returns. + } + + top: + if sched.gcwaiting != 0 { + gcstopm() + goto top + } + + var gp *g + // Check the global runnable queue once in a while to ensure fairness. + // Otherwise two goroutines can completely occupy the local runqueue + // by constantly respawning each other. + tick := _g_.m.p.schedtick + // This is a fancy way to say tick%61==0, + // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors. + if uint64(tick)-((uint64(tick)*0x4325c53f)>>36)*61 == 0 && sched.runqsize > 0 { + lock(&sched.lock) + gp = globrunqget(_g_.m.p, 1) + unlock(&sched.lock) + if gp != nil { + resetspinning() + } + } + if gp == nil { + gp = runqget(_g_.m.p) + if gp != nil && _g_.m.spinning { + gothrow("schedule: spinning with local work") + } + } + if gp == nil { + gp = findrunnable() // blocks until work is available + resetspinning() + } + + if gp.lockedm != nil { + // Hands off own p to the locked m, + // then blocks waiting for a new p. + startlockedm(gp) + goto top + } + + execute(gp) + } + + // dropg removes the association between m and the current goroutine m->curg (gp for short). + // Typically a caller sets gp's status away from Grunning and then + // immediately calls dropg to finish the job. The caller is also responsible + // for arranging that gp will be restarted using ready at an + // appropriate time. After calling dropg and arranging for gp to be + // readied later, the caller can do other work but eventually should + // call schedule to restart the scheduling of goroutines on this m. + func dropg() { + _g_ := getg() + + if _g_.m.lockedg == nil { + _g_.m.curg.m = nil + _g_.m.curg = nil + } + } + + // Puts the current goroutine into a waiting state and calls unlockf. + // If unlockf returns false, the goroutine is resumed. + func park(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason string) { + _g_ := getg() + + _g_.m.waitlock = lock + _g_.m.waitunlockf = *(*unsafe.Pointer)(unsafe.Pointer(&unlockf)) + _g_.waitreason = reason + mcall(park_m) + } + + func parkunlock_c(gp *g, lock unsafe.Pointer) bool { + unlock((*mutex)(lock)) + return true + } + + // Puts the current goroutine into a waiting state and unlocks the lock. + // The goroutine can be made runnable again by calling ready(gp). + func parkunlock(lock *mutex, reason string) { + park(parkunlock_c, unsafe.Pointer(lock), reason) + } + + // park continuation on g0. + func park_m(gp *g) { + _g_ := getg() + + casgstatus(gp, _Grunning, _Gwaiting) + dropg() + + if _g_.m.waitunlockf != nil { + fn := *(*func(*g, unsafe.Pointer) bool)(unsafe.Pointer(&_g_.m.waitunlockf)) + ok := fn(gp, _g_.m.waitlock) + _g_.m.waitunlockf = nil + _g_.m.waitlock = nil + if !ok { + casgstatus(gp, _Gwaiting, _Grunnable) + execute(gp) // Schedule it back, never returns. + } + } + schedule() + } + + // Gosched continuation on g0. + func gosched_m(gp *g) { + status := readgstatus(gp) + if status&^_Gscan != _Grunning { + dumpgstatus(gp) + gothrow("bad g status") + } + casgstatus(gp, _Grunning, _Grunnable) + dropg() + lock(&sched.lock) + globrunqput(gp) + unlock(&sched.lock) + + schedule() + } + + // Finishes execution of the current goroutine. + // Must be NOSPLIT because it is called from Go. (TODO - probably not anymore) + //go:nosplit + func goexit1() { + if raceenabled { + racegoend() + } + mcall(goexit0) + } + + // goexit continuation on g0. + func goexit0(gp *g) { + _g_ := getg() + + casgstatus(gp, _Grunning, _Gdead) + gp.m = nil + gp.lockedm = nil + _g_.m.lockedg = nil + gp.paniconfault = false + gp._defer = nil // should be true already but just in case. + gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data. + gp.writebuf = nil + gp.waitreason = "" + gp.param = nil + + dropg() + + if _g_.m.locked&^_LockExternal != 0 { + print("invalid m->locked = ", _g_.m.locked, "\n") + gothrow("internal lockOSThread error") + } + _g_.m.locked = 0 + gfput(_g_.m.p, gp) + schedule() + } + + //go:nosplit + func save(pc, sp uintptr) { + _g_ := getg() + + _g_.sched.pc = pc + _g_.sched.sp = sp + _g_.sched.lr = 0 + _g_.sched.ret = 0 + _g_.sched.ctxt = nil - _g_.sched.g = _g_ ++ // write as uintptr to avoid write barrier, which will smash _g_.sched. ++ *(*uintptr)(unsafe.Pointer(&_g_.sched.g)) = uintptr(unsafe.Pointer(_g_)) + } + + // The goroutine g is about to enter a system call. + // Record that it's not using the cpu anymore. + // This is called only from the go syscall library and cgocall, + // not from the low-level system calls used by the + // + // Entersyscall cannot split the stack: the gosave must + // make g->sched refer to the caller's stack segment, because + // entersyscall is going to return immediately after. + // + // Nothing entersyscall calls can split the stack either. + // We cannot safely move the stack during an active call to syscall, + // because we do not know which of the uintptr arguments are + // really pointers (back into the stack). + // In practice, this means that we make the fast path run through + // entersyscall doing no-split things, and the slow path has to use systemstack + // to run bigger things on the system stack. + // + // reentersyscall is the entry point used by cgo callbacks, where explicitly + // saved SP and PC are restored. This is needed when exitsyscall will be called + // from a function further up in the call stack than the parent, as g->syscallsp + // must always point to a valid stack frame. entersyscall below is the normal + // entry point for syscalls, which obtains the SP and PC from the caller. + //go:nosplit + func reentersyscall(pc, sp uintptr) { + _g_ := getg() + + // Disable preemption because during this function g is in Gsyscall status, + // but can have inconsistent g->sched, do not let GC observe it. + _g_.m.locks++ + + // Entersyscall must not call any function that might split/grow the stack. + // (See details in comment above.) + // Catch calls that might, by replacing the stack guard with something that + // will trip any stack check and leaving a flag to tell newstack to die. + _g_.stackguard0 = stackPreempt + _g_.throwsplit = true + + // Leave SP around for GC and traceback. + save(pc, sp) + _g_.syscallsp = sp + _g_.syscallpc = pc + casgstatus(_g_, _Grunning, _Gsyscall) + if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp { - systemstack(entersyscall_bad) ++ systemstack(func() { ++ print("entersyscall inconsistent ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n") ++ gothrow("entersyscall") ++ }) + } + + if atomicload(&sched.sysmonwait) != 0 { // TODO: fast atomic + systemstack(entersyscall_sysmon) + save(pc, sp) + } + + _g_.m.mcache = nil + _g_.m.p.m = nil + atomicstore(&_g_.m.p.status, _Psyscall) + if sched.gcwaiting != 0 { + systemstack(entersyscall_gcwait) + save(pc, sp) + } + + // Goroutines must not split stacks in Gsyscall status (it would corrupt g->sched). + // We set _StackGuard to StackPreempt so that first split stack check calls morestack. + // Morestack detects this case and throws. + _g_.stackguard0 = stackPreempt + _g_.m.locks-- + } + + // Standard syscall entry used by the go syscall library and normal cgo calls. + //go:nosplit + func entersyscall(dummy int32) { + reentersyscall(getcallerpc(unsafe.Pointer(&dummy)), getcallersp(unsafe.Pointer(&dummy))) + } + -func entersyscall_bad() { - var gp *g - gp = getg().m.curg - print("entersyscall inconsistent ", hex(gp.syscallsp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") - gothrow("entersyscall") -} - + func entersyscall_sysmon() { + lock(&sched.lock) + if atomicload(&sched.sysmonwait) != 0 { + atomicstore(&sched.sysmonwait, 0) + notewakeup(&sched.sysmonnote) + } + unlock(&sched.lock) + } + + func entersyscall_gcwait() { + _g_ := getg() + + lock(&sched.lock) + if sched.stopwait > 0 && cas(&_g_.m.p.status, _Psyscall, _Pgcstop) { + if sched.stopwait--; sched.stopwait == 0 { + notewakeup(&sched.stopnote) + } + } + unlock(&sched.lock) + } + + // The same as entersyscall(), but with a hint that the syscall is blocking. + //go:nosplit + func entersyscallblock(dummy int32) { + _g_ := getg() + + _g_.m.locks++ // see comment in entersyscall + _g_.throwsplit = true + _g_.stackguard0 = stackPreempt // see comment in entersyscall + + // Leave SP around for GC and traceback. - save(getcallerpc(unsafe.Pointer(&dummy)), getcallersp(unsafe.Pointer(&dummy))) ++ pc := getcallerpc(unsafe.Pointer(&dummy)) ++ sp := getcallersp(unsafe.Pointer(&dummy)) ++ save(pc, sp) + _g_.syscallsp = _g_.sched.sp + _g_.syscallpc = _g_.sched.pc ++ if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp { ++ sp1 := sp ++ sp2 := _g_.sched.sp ++ sp3 := _g_.syscallsp ++ systemstack(func() { ++ print("entersyscallblock inconsistent ", hex(sp1), " ", hex(sp2), " ", hex(sp3), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n") ++ gothrow("entersyscallblock") ++ }) ++ } + casgstatus(_g_, _Grunning, _Gsyscall) + if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp { - systemstack(entersyscall_bad) ++ systemstack(func() { ++ print("entersyscallblock inconsistent ", hex(sp), " ", hex(_g_.sched.sp), " ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n") ++ gothrow("entersyscallblock") ++ }) + } + + systemstack(entersyscallblock_handoff) + + // Resave for traceback during blocked call. + save(getcallerpc(unsafe.Pointer(&dummy)), getcallersp(unsafe.Pointer(&dummy))) + + _g_.m.locks-- + } + + func entersyscallblock_handoff() { + handoffp(releasep()) + } + + // The goroutine g exited its system call. + // Arrange for it to run on a cpu again. + // This is called only from the go syscall library, not + // from the low-level system calls used by the + //go:nosplit + func exitsyscall(dummy int32) { + _g_ := getg() + + _g_.m.locks++ // see comment in entersyscall + if getcallersp(unsafe.Pointer(&dummy)) > _g_.syscallsp { + gothrow("exitsyscall: syscall frame is no longer valid") + } + + _g_.waitsince = 0 + if exitsyscallfast() { + if _g_.m.mcache == nil { + gothrow("lost mcache") + } + // There's a cpu for us, so we can run. + _g_.m.p.syscalltick++ + // We need to cas the status and scan before resuming... + casgstatus(_g_, _Gsyscall, _Grunning) + + // Garbage collector isn't running (since we are), + // so okay to clear syscallsp. + _g_.syscallsp = 0 + _g_.m.locks-- + if _g_.preempt { + // restore the preemption request in case we've cleared it in newstack + _g_.stackguard0 = stackPreempt + } else { + // otherwise restore the real _StackGuard, we've spoiled it in entersyscall/entersyscallblock + _g_.stackguard0 = _g_.stack.lo + _StackGuard + } + _g_.throwsplit = false + return + } + + _g_.m.locks-- + + // Call the scheduler. + mcall(exitsyscall0) + + if _g_.m.mcache == nil { + gothrow("lost mcache") + } + + // Scheduler returned, so we're allowed to run now. + // Delete the syscallsp information that we left for + // the garbage collector during the system call. + // Must wait until now because until gosched returns + // we don't know for sure that the garbage collector + // is not running. + _g_.syscallsp = 0 + _g_.m.p.syscalltick++ + _g_.throwsplit = false + } + + //go:nosplit + func exitsyscallfast() bool { + _g_ := getg() + + // Freezetheworld sets stopwait but does not retake P's. + if sched.stopwait != 0 { ++ _g_.m.mcache = nil + _g_.m.p = nil + return false + } + + // Try to re-acquire the last P. + if _g_.m.p != nil && _g_.m.p.status == _Psyscall && cas(&_g_.m.p.status, _Psyscall, _Prunning) { + // There's a cpu for us, so we can run. + _g_.m.mcache = _g_.m.p.mcache + _g_.m.p.m = _g_.m + return true + } + + // Try to get any other idle P. ++ _g_.m.mcache = nil + _g_.m.p = nil + if sched.pidle != nil { + var ok bool + systemstack(func() { + ok = exitsyscallfast_pidle() + }) + if ok { + return true + } + } + return false + } + + func exitsyscallfast_pidle() bool { + lock(&sched.lock) + _p_ := pidleget() + if _p_ != nil && atomicload(&sched.sysmonwait) != 0 { + atomicstore(&sched.sysmonwait, 0) + notewakeup(&sched.sysmonnote) + } + unlock(&sched.lock) + if _p_ != nil { + acquirep(_p_) + return true + } + return false + } + + // exitsyscall slow path on g0. + // Failed to acquire P, enqueue gp as runnable. + func exitsyscall0(gp *g) { + _g_ := getg() + + casgstatus(gp, _Gsyscall, _Grunnable) + dropg() + lock(&sched.lock) + _p_ := pidleget() + if _p_ == nil { + globrunqput(gp) + } else if atomicload(&sched.sysmonwait) != 0 { + atomicstore(&sched.sysmonwait, 0) + notewakeup(&sched.sysmonnote) + } + unlock(&sched.lock) + if _p_ != nil { + acquirep(_p_) + execute(gp) // Never returns. + } + if _g_.m.lockedg != nil { + // Wait until another thread schedules gp and so m again. + stoplockedm() + execute(gp) // Never returns. + } + stopm() + schedule() // Never returns. + } + + func beforefork() { + gp := getg().m.curg + + // Fork can hang if preempted with signals frequently enough (see issue 5517). + // Ensure that we stay on the same M where we disable profiling. + gp.m.locks++ + if gp.m.profilehz != 0 { + resetcpuprofiler(0) + } + + // This function is called before fork in syscall package. + // Code between fork and exec must not allocate memory nor even try to grow stack. + // Here we spoil g->_StackGuard to reliably detect any attempts to grow stack. + // runtime_AfterFork will undo this in parent process, but not in child. + gp.stackguard0 = stackFork + } + + // Called from syscall package before fork. + //go:nosplit + func syscall_BeforeFork() { + systemstack(beforefork) + } + + func afterfork() { + gp := getg().m.curg + + // See the comment in beforefork. + gp.stackguard0 = gp.stack.lo + _StackGuard + + hz := sched.profilehz + if hz != 0 { + resetcpuprofiler(hz) + } + gp.m.locks-- + } + + // Called from syscall package after fork in parent. + //go:nosplit + func syscall_AfterFork() { + systemstack(afterfork) + } + + // Allocate a new g, with a stack big enough for stacksize bytes. + func malg(stacksize int32) *g { + newg := allocg() + if stacksize >= 0 { + stacksize = round2(_StackSystem + stacksize) + systemstack(func() { + newg.stack = stackalloc(uint32(stacksize)) + }) + newg.stackguard0 = newg.stack.lo + _StackGuard + newg.stackguard1 = ^uintptr(0) + } + return newg + } + + // Create a new g running fn with siz bytes of arguments. + // Put it on the queue of g's waiting to run. + // The compiler turns a go statement into a call to this. + // Cannot split the stack because it assumes that the arguments + // are available sequentially after &fn; they would not be + // copied if a stack split occurred. + //go:nosplit + func newproc(siz int32, fn *funcval) { + argp := add(unsafe.Pointer(&fn), ptrSize) + if hasLinkRegister { + argp = add(argp, ptrSize) // skip caller's saved LR + } + + pc := getcallerpc(unsafe.Pointer(&siz)) + systemstack(func() { + newproc1(fn, (*uint8)(argp), siz, 0, pc) + }) + } + + // Create a new g running fn with narg bytes of arguments starting + // at argp and returning nret bytes of results. callerpc is the + // address of the go statement that created this. The new g is put + // on the queue of g's waiting to run. + func newproc1(fn *funcval, argp *uint8, narg int32, nret int32, callerpc uintptr) *g { + _g_ := getg() + + if fn == nil { + _g_.m.throwing = -1 // do not dump full stacks + gothrow("go of nil func value") + } + _g_.m.locks++ // disable preemption because it can be holding p in a local var + siz := narg + nret + siz = (siz + 7) &^ 7 + + // We could allocate a larger initial stack if necessary. + // Not worth it: this is almost always an error. + // 4*sizeof(uintreg): extra space added below + // sizeof(uintreg): caller's LR (arm) or return address (x86, in gostartcall). + if siz >= _StackMin-4*regSize-regSize { + gothrow("newproc: function arguments too large for new goroutine") + } + + _p_ := _g_.m.p + newg := gfget(_p_) + if newg == nil { + newg = malg(_StackMin) + casgstatus(newg, _Gidle, _Gdead) + allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack. + } + if newg.stack.hi == 0 { + gothrow("newproc1: newg missing stack") + } + + if readgstatus(newg) != _Gdead { + gothrow("newproc1: new g is not Gdead") + } + + sp := newg.stack.hi + sp -= 4 * regSize // extra space in case of reads slightly beyond frame + sp -= uintptr(siz) + memmove(unsafe.Pointer(sp), unsafe.Pointer(argp), uintptr(narg)) + if hasLinkRegister { + // caller's LR + sp -= ptrSize + *(*unsafe.Pointer)(unsafe.Pointer(sp)) = nil + } + + memclr(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched)) + newg.sched.sp = sp + newg.sched.pc = funcPC(goexit) + _PCQuantum // +PCQuantum so that previous instruction is in same function + newg.sched.g = newg + gostartcallfn(&newg.sched, fn) + newg.gopc = callerpc + casgstatus(newg, _Gdead, _Grunnable) + + if _p_.goidcache == _p_.goidcacheend { + // Sched.goidgen is the last allocated id, + // this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch]. + // At startup sched.goidgen=0, so main goroutine receives goid=1. + _p_.goidcache = xadd64(&sched.goidgen, _GoidCacheBatch) + _p_.goidcache -= _GoidCacheBatch - 1 + _p_.goidcacheend = _p_.goidcache + _GoidCacheBatch + } + newg.goid = int64(_p_.goidcache) + _p_.goidcache++ + if raceenabled { + newg.racectx = racegostart(callerpc) + } + runqput(_p_, newg) + + if atomicload(&sched.npidle) != 0 && atomicload(&sched.nmspinning) == 0 && unsafe.Pointer(fn.fn) != unsafe.Pointer(funcPC(main)) { // TODO: fast atomic + wakep() + } + _g_.m.locks-- + if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack + _g_.stackguard0 = stackPreempt + } + return newg + } + + // Put on gfree list. + // If local list is too long, transfer a batch to the global list. + func gfput(_p_ *p, gp *g) { + if readgstatus(gp) != _Gdead { + gothrow("gfput: bad status (not Gdead)") + } + + stksize := gp.stack.hi - gp.stack.lo + + if stksize != _FixedStack { + // non-standard stack size - free it. + stackfree(gp.stack) + gp.stack.lo = 0 + gp.stack.hi = 0 + gp.stackguard0 = 0 + } + + gp.schedlink = _p_.gfree + _p_.gfree = gp + _p_.gfreecnt++ + if _p_.gfreecnt >= 64 { + lock(&sched.gflock) + for _p_.gfreecnt >= 32 { + _p_.gfreecnt-- + gp = _p_.gfree + _p_.gfree = gp.schedlink + gp.schedlink = sched.gfree + sched.gfree = gp + sched.ngfree++ + } + unlock(&sched.gflock) + } + } + + // Get from gfree list. + // If local list is empty, grab a batch from global list. + func gfget(_p_ *p) *g { + retry: + gp := _p_.gfree + if gp == nil && sched.gfree != nil { + lock(&sched.gflock) + for _p_.gfreecnt < 32 && sched.gfree != nil { + _p_.gfreecnt++ + gp = sched.gfree + sched.gfree = gp.schedlink + sched.ngfree-- + gp.schedlink = _p_.gfree + _p_.gfree = gp + } + unlock(&sched.gflock) + goto retry + } + if gp != nil { + _p_.gfree = gp.schedlink + _p_.gfreecnt-- + if gp.stack.lo == 0 { + // Stack was deallocated in gfput. Allocate a new one. + systemstack(func() { + gp.stack = stackalloc(_FixedStack) + }) + gp.stackguard0 = gp.stack.lo + _StackGuard + } else { + if raceenabled { + racemalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo) + } + } + } + return gp + } + + // Purge all cached G's from gfree list to the global list. + func gfpurge(_p_ *p) { + lock(&sched.gflock) + for _p_.gfreecnt != 0 { + _p_.gfreecnt-- + gp := _p_.gfree + _p_.gfree = gp.schedlink + gp.schedlink = sched.gfree + sched.gfree = gp + sched.ngfree++ + } + unlock(&sched.gflock) + } + + // Breakpoint executes a breakpoint trap. + func Breakpoint() { + breakpoint() + } + + // dolockOSThread is called by LockOSThread and lockOSThread below + // after they modify m.locked. Do not allow preemption during this call, + // or else the m might be different in this function than in the caller. + //go:nosplit + func dolockOSThread() { + _g_ := getg() + _g_.m.lockedg = _g_ + _g_.lockedm = _g_.m + } + + //go:nosplit + + // LockOSThread wires the calling goroutine to its current operating system thread. + // Until the calling goroutine exits or calls UnlockOSThread, it will always + // execute in that thread, and no other goroutine can. + func LockOSThread() { + getg().m.locked |= _LockExternal + dolockOSThread() + } + + //go:nosplit + func lockOSThread() { + getg().m.locked += _LockInternal + dolockOSThread() + } + + // dounlockOSThread is called by UnlockOSThread and unlockOSThread below + // after they update m->locked. Do not allow preemption during this call, + // or else the m might be in different in this function than in the caller. + //go:nosplit + func dounlockOSThread() { + _g_ := getg() + if _g_.m.locked != 0 { + return + } + _g_.m.lockedg = nil + _g_.lockedm = nil + } + + //go:nosplit + + // UnlockOSThread unwires the calling goroutine from its fixed operating system thread. + // If the calling goroutine has not called LockOSThread, UnlockOSThread is a no-op. + func UnlockOSThread() { + getg().m.locked &^= _LockExternal + dounlockOSThread() + } + + //go:nosplit + func unlockOSThread() { + _g_ := getg() + if _g_.m.locked < _LockInternal { + systemstack(badunlockosthread) + } + _g_.m.locked -= _LockInternal + dounlockOSThread() + } + + func badunlockosthread() { + gothrow("runtime: internal error: misuse of lockOSThread/unlockOSThread") + } + + func gcount() int32 { + n := int32(allglen) - sched.ngfree + for i := 0; ; i++ { + _p_ := allp[i] + if _p_ == nil { + break + } + n -= _p_.gfreecnt + } + + // All these variables can be changed concurrently, so the result can be inconsistent. + // But at least the current goroutine is running. + if n < 1 { + n = 1 + } + return n + } + + func mcount() int32 { + return sched.mcount + } + + var prof struct { + lock uint32 + hz int32 + } + + func _System() { _System() } + func _ExternalCode() { _ExternalCode() } + func _GC() { _GC() } + + var etext struct{} + + // Called if we receive a SIGPROF signal. + func sigprof(pc *uint8, sp *uint8, lr *uint8, gp *g, mp *m) { + var n int32 + var traceback bool + var stk [100]uintptr + + if prof.hz == 0 { + return + } + + // Profiling runs concurrently with GC, so it must not allocate. + mp.mallocing++ + + // Define that a "user g" is a user-created goroutine, and a "system g" + // is one that is m->g0 or m->gsignal. We've only made sure that we + // can unwind user g's, so exclude the system g's. + // + // It is not quite as easy as testing gp == m->curg (the current user g) + // because we might be interrupted for profiling halfway through a + // goroutine switch. The switch involves updating three (or four) values: + // g, PC, SP, and (on arm) LR. The PC must be the last to be updated, + // because once it gets updated the new g is running. + // + // When switching from a user g to a system g, LR is not considered live, + // so the update only affects g, SP, and PC. Since PC must be last, there + // the possible partial transitions in ordinary execution are (1) g alone is updated, + // (2) both g and SP are updated, and (3) SP alone is updated. + // If g is updated, we'll see a system g and not look closer. + // If SP alone is updated, we can detect the partial transition by checking + // whether the SP is within g's stack bounds. (We could also require that SP + // be changed only after g, but the stack bounds check is needed by other + // cases, so there is no need to impose an additional requirement.) + // + // There is one exceptional transition to a system g, not in ordinary execution. + // When a signal arrives, the operating system starts the signal handler running + // with an updated PC and SP. The g is updated last, at the beginning of the + // handler. There are two reasons this is okay. First, until g is updated the + // g and SP do not match, so the stack bounds check detects the partial transition. + // Second, signal handlers currently run with signals disabled, so a profiling + // signal cannot arrive during the handler. + // + // When switching from a system g to a user g, there are three possibilities. + // + // First, it may be that the g switch has no PC update, because the SP + // either corresponds to a user g throughout (as in asmcgocall) + // or because it has been arranged to look like a user g frame + // (as in cgocallback_gofunc). In this case, since the entire + // transition is a g+SP update, a partial transition updating just one of + // those will be detected by the stack bounds check. + // + // Second, when returning from a signal handler, the PC and SP updates + // are performed by the operating system in an atomic update, so the g + // update must be done before them. The stack bounds check detects + // the partial transition here, and (again) signal handlers run with signals + // disabled, so a profiling signal cannot arrive then anyway. + // + // Third, the common case: it may be that the switch updates g, SP, and PC + // separately, as in gogo. + // + // Because gogo is the only instance, we check whether the PC lies + // within that function, and if so, not ask for a traceback. This approach + // requires knowing the size of the gogo function, which we + // record in arch_*.h and check in runtime_test.go. + // + // There is another apparently viable approach, recorded here in case + // the "PC within gogo" check turns out not to be usable. + // It would be possible to delay the update of either g or SP until immediately + // before the PC update instruction. Then, because of the stack bounds check, + // the only problematic interrupt point is just before that PC update instruction, + // and the sigprof handler can detect that instruction and simulate stepping past + // it in order to reach a consistent state. On ARM, the update of g must be made + // in two places (in R10 and also in a TLS slot), so the delayed update would + // need to be the SP update. The sigprof handler must read the instruction at + // the current PC and if it was the known instruction (for example, JMP BX or + // MOV R2, PC), use that other register in place of the PC value. + // The biggest drawback to this solution is that it requires that we can tell + // whether it's safe to read from the memory pointed at by PC. + // In a correct program, we can test PC == nil and otherwise read, + // but if a profiling signal happens at the instant that a program executes + // a bad jump (before the program manages to handle the resulting fault) + // the profiling handler could fault trying to read nonexistent memory. + // + // To recap, there are no constraints on the assembly being used for the + // transition. We simply require that g and SP match and that the PC is not + // in gogo. + traceback = true + usp := uintptr(unsafe.Pointer(sp)) + gogo := funcPC(gogo) + if gp == nil || gp != mp.curg || + usp < gp.stack.lo || gp.stack.hi < usp || + (gogo <= uintptr(unsafe.Pointer(pc)) && uintptr(unsafe.Pointer(pc)) < gogo+_RuntimeGogoBytes) { + traceback = false + } + + n = 0 + if traceback { + n = int32(gentraceback(uintptr(unsafe.Pointer(pc)), uintptr(unsafe.Pointer(sp)), uintptr(unsafe.Pointer(lr)), gp, 0, &stk[0], len(stk), nil, nil, _TraceTrap)) + } + if !traceback || n <= 0 { + // Normal traceback is impossible or has failed. + // See if it falls into several common cases. + n = 0 + if mp.ncgo > 0 && mp.curg != nil && mp.curg.syscallpc != 0 && mp.curg.syscallsp != 0 { + // Cgo, we can't unwind and symbolize arbitrary C code, + // so instead collect Go stack that leads to the cgo call. + // This is especially important on windows, since all syscalls are cgo calls. + n = int32(gentraceback(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, 0, &stk[0], len(stk), nil, nil, 0)) + } + if GOOS == "windows" && n == 0 && mp.libcallg != nil && mp.libcallpc != 0 && mp.libcallsp != 0 { + // Libcall, i.e. runtime syscall on windows. + // Collect Go stack that leads to the call. + n = int32(gentraceback(mp.libcallpc, mp.libcallsp, 0, mp.libcallg, 0, &stk[0], len(stk), nil, nil, 0)) + } + if n == 0 { + // If all of the above has failed, account it against abstract "System" or "GC". + n = 2 + // "ExternalCode" is better than "etext". + if uintptr(unsafe.Pointer(pc)) > uintptr(unsafe.Pointer(&etext)) { + pc = (*uint8)(unsafe.Pointer(uintptr(funcPC(_ExternalCode) + _PCQuantum))) + } + stk[0] = uintptr(unsafe.Pointer(pc)) + if mp.gcing != 0 || mp.helpgc != 0 { + stk[1] = funcPC(_GC) + _PCQuantum + } else { + stk[1] = funcPC(_System) + _PCQuantum + } + } + } + + if prof.hz != 0 { + // Simple cas-lock to coordinate with setcpuprofilerate. + for !cas(&prof.lock, 0, 1) { + osyield() + } + if prof.hz != 0 { + cpuproftick(&stk[0], n) + } + atomicstore(&prof.lock, 0) + } + mp.mallocing-- + } + + // Arrange to call fn with a traceback hz times a second. + func setcpuprofilerate_m(hz int32) { + // Force sane arguments. + if hz < 0 { + hz = 0 + } + + // Disable preemption, otherwise we can be rescheduled to another thread + // that has profiling enabled. + _g_ := getg() + _g_.m.locks++ + + // Stop profiler on this thread so that it is safe to lock prof. + // if a profiling signal came in while we had prof locked, + // it would deadlock. + resetcpuprofiler(0) + + for !cas(&prof.lock, 0, 1) { + osyield() + } + prof.hz = hz + atomicstore(&prof.lock, 0) + + lock(&sched.lock) + sched.profilehz = hz + unlock(&sched.lock) + + if hz != 0 { + resetcpuprofiler(hz) + } + + _g_.m.locks-- + } + + // Change number of processors. The world is stopped, sched is locked. ++// gcworkbufs are not being modified by either the GC or ++// the write barrier code. + func procresize(new int32) { + old := gomaxprocs + if old < 0 || old > _MaxGomaxprocs || new <= 0 || new > _MaxGomaxprocs { + gothrow("procresize: invalid arg") + } + + // initialize new P's + for i := int32(0); i < new; i++ { + p := allp[i] + if p == nil { + p = newP() + p.id = i + p.status = _Pgcstop + atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(p)) + } + if p.mcache == nil { + if old == 0 && i == 0 { + if getg().m.mcache == nil { + gothrow("missing mcache?") + } + p.mcache = getg().m.mcache // bootstrap + } else { + p.mcache = allocmcache() + } + } + } + + // redistribute runnable G's evenly + // collect all runnable goroutines in global queue preserving FIFO order + // FIFO order is required to ensure fairness even during frequent GCs + // see http://golang.org/issue/7126 + empty := false + for !empty { + empty = true + for i := int32(0); i < old; i++ { + p := allp[i] + if p.runqhead == p.runqtail { + continue + } + empty = false + // pop from tail of local queue + p.runqtail-- + gp := p.runq[p.runqtail%uint32(len(p.runq))] + // push onto head of global queue + gp.schedlink = sched.runqhead + sched.runqhead = gp + if sched.runqtail == nil { + sched.runqtail = gp + } + sched.runqsize++ + } + } + + // fill local queues with at most len(p.runq)/2 goroutines + // start at 1 because current M already executes some G and will acquire allp[0] below, + // so if we have a spare G we want to put it into allp[1]. + var _p_ p + for i := int32(1); i < new*int32(len(_p_.runq))/2 && sched.runqsize > 0; i++ { + gp := sched.runqhead + sched.runqhead = gp.schedlink + if sched.runqhead == nil { + sched.runqtail = nil + } + sched.runqsize-- + runqput(allp[i%new], gp) + } + + // free unused P's + for i := new; i < old; i++ { + p := allp[i] + freemcache(p.mcache) + p.mcache = nil + gfpurge(p) + p.status = _Pdead + // can't free P itself because it can be referenced by an M in syscall + } + + _g_ := getg() + if _g_.m.p != nil { + _g_.m.p.m = nil + } + _g_.m.p = nil + _g_.m.mcache = nil + p := allp[0] + p.m = nil + p.status = _Pidle + acquirep(p) + for i := new - 1; i > 0; i-- { + p := allp[i] + p.status = _Pidle + pidleput(p) + } + var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32 + atomicstore((*uint32)(unsafe.Pointer(int32p)), uint32(new)) + } + + // Associate p and the current m. + func acquirep(_p_ *p) { + _g_ := getg() + + if _g_.m.p != nil || _g_.m.mcache != nil { + gothrow("acquirep: already in go") + } + if _p_.m != nil || _p_.status != _Pidle { + id := int32(0) + if _p_.m != nil { + id = _p_.m.id + } + print("acquirep: p->m=", _p_.m, "(", id, ") p->status=", _p_.status, "\n") + gothrow("acquirep: invalid p state") + } + _g_.m.mcache = _p_.mcache + _g_.m.p = _p_ + _p_.m = _g_.m + _p_.status = _Prunning + } + + // Disassociate p and the current m. + func releasep() *p { + _g_ := getg() + + if _g_.m.p == nil || _g_.m.mcache == nil { + gothrow("releasep: invalid arg") + } + _p_ := _g_.m.p + if _p_.m != _g_.m || _p_.mcache != _g_.m.mcache || _p_.status != _Prunning { + print("releasep: m=", _g_.m, " m->p=", _g_.m.p, " p->m=", _p_.m, " m->mcache=", _g_.m.mcache, " p->mcache=", _p_.mcache, " p->status=", _p_.status, "\n") + gothrow("releasep: invalid p state") + } + _g_.m.p = nil + _g_.m.mcache = nil + _p_.m = nil + _p_.status = _Pidle + return _p_ + } + + func incidlelocked(v int32) { + lock(&sched.lock) + sched.nmidlelocked += v + if v > 0 { + checkdead() + } + unlock(&sched.lock) + } + + // Check for deadlock situation. + // The check is based on number of running M's, if 0 -> deadlock. + func checkdead() { + // If we are dying because of a signal caught on an already idle thread, + // freezetheworld will cause all running threads to block. + // And runtime will essentially enter into deadlock state, + // except that there is a thread that will call exit soon. + if panicking > 0 { + return + } + + // -1 for sysmon + run := sched.mcount - sched.nmidle - sched.nmidlelocked - 1 + if run > 0 { + return + } + if run < 0 { + print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", sched.mcount, "\n") + gothrow("checkdead: inconsistent counts") + } + + grunning := 0 + lock(&allglock) + for i := 0; i < len(allgs); i++ { + gp := allgs[i] + if gp.issystem { + continue + } + s := readgstatus(gp) + switch s &^ _Gscan { + case _Gwaiting: + grunning++ + case _Grunnable, + _Grunning, + _Gsyscall: + unlock(&allglock) + print("runtime: checkdead: find g ", gp.goid, " in status ", s, "\n") + gothrow("checkdead: runnable g") + } + } + unlock(&allglock) + if grunning == 0 { // possible if main goroutine calls runtime·Goexit() + gothrow("no goroutines (main called runtime.Goexit) - deadlock!") + } + + // Maybe jump time forward for playground. + gp := timejump() + if gp != nil { + casgstatus(gp, _Gwaiting, _Grunnable) + globrunqput(gp) + _p_ := pidleget() + if _p_ == nil { + gothrow("checkdead: no p for timer") + } + mp := mget() + if mp == nil { + _newm(nil, _p_) + } else { + mp.nextp = _p_ + notewakeup(&mp.park) + } + return + } + + getg().m.throwing = -1 // do not dump full stacks + gothrow("all goroutines are asleep - deadlock!") + } + + func sysmon() { + // If we go two minutes without a garbage collection, force one to run. + forcegcperiod := int64(2 * 60 * 1e9) + + // If a heap span goes unused for 5 minutes after a garbage collection, + // we hand it back to the operating system. + scavengelimit := int64(5 * 60 * 1e9) + + if debug.scavenge > 0 { + // Scavenge-a-lot for testing. + forcegcperiod = 10 * 1e6 + scavengelimit = 20 * 1e6 + } + + lastscavenge := nanotime() + nscavenge := 0 + + // Make wake-up period small enough for the sampling to be correct. + maxsleep := forcegcperiod / 2 + if scavengelimit < forcegcperiod { + maxsleep = scavengelimit / 2 + } + + lasttrace := int64(0) + idle := 0 // how many cycles in succession we had not wokeup somebody + delay := uint32(0) + for { + if idle == 0 { // start with 20us sleep... + delay = 20 + } else if idle > 50 { // start doubling the sleep after 1ms... + delay *= 2 + } + if delay > 10*1000 { // up to 10ms + delay = 10 * 1000 + } + usleep(delay) + if debug.schedtrace <= 0 && (sched.gcwaiting != 0 || atomicload(&sched.npidle) == uint32(gomaxprocs)) { // TODO: fast atomic + lock(&sched.lock) + if atomicload(&sched.gcwaiting) != 0 || atomicload(&sched.npidle) == uint32(gomaxprocs) { + atomicstore(&sched.sysmonwait, 1) + unlock(&sched.lock) + notetsleep(&sched.sysmonnote, maxsleep) + lock(&sched.lock) + atomicstore(&sched.sysmonwait, 0) + noteclear(&sched.sysmonnote) + idle = 0 + delay = 20 + } + unlock(&sched.lock) + } + // poll network if not polled for more than 10ms + lastpoll := int64(atomicload64(&sched.lastpoll)) + now := nanotime() + unixnow := unixnanotime() + if lastpoll != 0 && lastpoll+10*1000*1000 < now { + cas64(&sched.lastpoll, uint64(lastpoll), uint64(now)) + gp := netpoll(false) // non-blocking - returns list of goroutines + if gp != nil { + // Need to decrement number of idle locked M's + // (pretending that one more is running) before injectglist. + // Otherwise it can lead to the following situation: + // injectglist grabs all P's but before it starts M's to run the P's, + // another M returns from syscall, finishes running its G, + // observes that there is no work to do and no other running M's + // and reports deadlock. + incidlelocked(-1) + injectglist(gp) + incidlelocked(1) + } + } + // retake P's blocked in syscalls + // and preempt long running G's + if retake(now) != 0 { + idle = 0 + } else { + idle++ + } + // check if we need to force a GC + lastgc := int64(atomicload64(&memstats.last_gc)) + if lastgc != 0 && unixnow-lastgc > forcegcperiod && atomicload(&forcegc.idle) != 0 { + lock(&forcegc.lock) + forcegc.idle = 0 + forcegc.g.schedlink = nil + injectglist(forcegc.g) + unlock(&forcegc.lock) + } + // scavenge heap once in a while + if lastscavenge+scavengelimit/2 < now { + mHeap_Scavenge(int32(nscavenge), uint64(now), uint64(scavengelimit)) + lastscavenge = now + nscavenge++ + } + if debug.schedtrace > 0 && lasttrace+int64(debug.schedtrace*1000000) <= now { + lasttrace = now + schedtrace(debug.scheddetail > 0) + } + } + } + + var pdesc [_MaxGomaxprocs]struct { + schedtick uint32 + schedwhen int64 + syscalltick uint32 + syscallwhen int64 + } + + func retake(now int64) uint32 { + n := 0 + for i := int32(0); i < gomaxprocs; i++ { + _p_ := allp[i] + if _p_ == nil { + continue + } + pd := &pdesc[i] + s := _p_.status + if s == _Psyscall { + // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us). + t := int64(_p_.syscalltick) + if int64(pd.syscalltick) != t { + pd.syscalltick = uint32(t) + pd.syscallwhen = now + continue + } + // On the one hand we don't want to retake Ps if there is no other work to do, + // but on the other hand we want to retake them eventually + // because they can prevent the sysmon thread from deep sleep. + if _p_.runqhead == _p_.runqtail && atomicload(&sched.nmspinning)+atomicload(&sched.npidle) > 0 && pd.syscallwhen+10*1000*1000 > now { + continue + } + // Need to decrement number of idle locked M's + // (pretending that one more is running) before the CAS. + // Otherwise the M from which we retake can exit the syscall, + // increment nmidle and report deadlock. + incidlelocked(-1) + if cas(&_p_.status, s, _Pidle) { + n++ + handoffp(_p_) + } + incidlelocked(1) + } else if s == _Prunning { + // Preempt G if it's running for more than 10ms. + t := int64(_p_.schedtick) + if int64(pd.schedtick) != t { + pd.schedtick = uint32(t) + pd.schedwhen = now + continue + } + if pd.schedwhen+10*1000*1000 > now { + continue + } + preemptone(_p_) + } + } + return uint32(n) + } + + // Tell all goroutines that they have been preempted and they should stop. + // This function is purely best-effort. It can fail to inform a goroutine if a + // processor just started running it. + // No locks need to be held. + // Returns true if preemption request was issued to at least one goroutine. + func preemptall() bool { + res := false + for i := int32(0); i < gomaxprocs; i++ { + _p_ := allp[i] + if _p_ == nil || _p_.status != _Prunning { + continue + } + if preemptone(_p_) { + res = true + } + } + return res + } + + // Tell the goroutine running on processor P to stop. + // This function is purely best-effort. It can incorrectly fail to inform the + // goroutine. It can send inform the wrong goroutine. Even if it informs the + // correct goroutine, that goroutine might ignore the request if it is + // simultaneously executing newstack. + // No lock needs to be held. + // Returns true if preemption request was issued. + // The actual preemption will happen at some point in the future + // and will be indicated by the gp->status no longer being + // Grunning + func preemptone(_p_ *p) bool { + mp := _p_.m + if mp == nil || mp == getg().m { + return false + } + gp := mp.curg + if gp == nil || gp == mp.g0 { + return false + } + + gp.preempt = true + + // Every call in a go routine checks for stack overflow by + // comparing the current stack pointer to gp->stackguard0. + // Setting gp->stackguard0 to StackPreempt folds + // preemption into the normal stack overflow check. + gp.stackguard0 = stackPreempt + return true + } + + var starttime int64 + + func schedtrace(detailed bool) { + now := nanotime() + if starttime == 0 { + starttime = now + } + + lock(&sched.lock) + print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle, " threads=", sched.mcount, " spinningthreads=", sched.nmspinning, " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize) + if detailed { + print(" gcwaiting=", sched.gcwaiting, " nmidlelocked=", sched.nmidlelocked, " stopwait=", sched.stopwait, " sysmonwait=", sched.sysmonwait, "\n") + } + // We must be careful while reading data from P's, M's and G's. + // Even if we hold schedlock, most data can be changed concurrently. + // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil. + for i := int32(0); i < gomaxprocs; i++ { + _p_ := allp[i] + if _p_ == nil { + continue + } + mp := _p_.m + h := atomicload(&_p_.runqhead) + t := atomicload(&_p_.runqtail) + if detailed { + id := int32(-1) + if mp != nil { + id = mp.id + } + print(" P", i, ": status=", _p_.status, " schedtick=", _p_.schedtick, " syscalltick=", _p_.syscalltick, " m=", id, " runqsize=", t-h, " gfreecnt=", _p_.gfreecnt, "\n") + } else { + // In non-detailed mode format lengths of per-P run queues as: + // [len1 len2 len3 len4] + print(" ") + if i == 0 { + print("[") + } + print(t - h) + if i == gomaxprocs-1 { + print("]\n") + } + } + } + + if !detailed { + unlock(&sched.lock) + return + } + + for mp := allm; mp != nil; mp = mp.alllink { + _p_ := mp.p + gp := mp.curg + lockedg := mp.lockedg + id1 := int32(-1) + if _p_ != nil { + id1 = _p_.id + } + id2 := int64(-1) + if gp != nil { + id2 = gp.goid + } + id3 := int64(-1) + if lockedg != nil { + id3 = lockedg.goid + } + print(" M", mp.id, ": p=", id1, " curg=", id2, " mallocing=", mp.mallocing, " throwing=", mp.throwing, " gcing=", mp.gcing, ""+" locks=", mp.locks, " dying=", mp.dying, " helpgc=", mp.helpgc, " spinning=", mp.spinning, " blocked=", getg().m.blocked, " lockedg=", id3, "\n") + } + + lock(&allglock) + for gi := 0; gi < len(allgs); gi++ { + gp := allgs[gi] + mp := gp.m + lockedm := gp.lockedm + id1 := int32(-1) + if mp != nil { + id1 = mp.id + } + id2 := int32(-1) + if lockedm != nil { + id2 = lockedm.id + } + print(" G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason, ") m=", id1, " lockedm=", id2, "\n") + } + unlock(&allglock) + unlock(&sched.lock) + } + + // Put mp on midle list. + // Sched must be locked. + func mput(mp *m) { + mp.schedlink = sched.midle + sched.midle = mp + sched.nmidle++ + checkdead() + } + + // Try to get an m from midle list. + // Sched must be locked. + func mget() *m { + mp := sched.midle + if mp != nil { + sched.midle = mp.schedlink + sched.nmidle-- + } + return mp + } + + // Put gp on the global runnable queue. + // Sched must be locked. + func globrunqput(gp *g) { + gp.schedlink = nil + if sched.runqtail != nil { + sched.runqtail.schedlink = gp + } else { + sched.runqhead = gp + } + sched.runqtail = gp + sched.runqsize++ + } + + // Put a batch of runnable goroutines on the global runnable queue. + // Sched must be locked. + func globrunqputbatch(ghead *g, gtail *g, n int32) { + gtail.schedlink = nil + if sched.runqtail != nil { + sched.runqtail.schedlink = ghead + } else { + sched.runqhead = ghead + } + sched.runqtail = gtail + sched.runqsize += n + } + + // Try get a batch of G's from the global runnable queue. + // Sched must be locked. + func globrunqget(_p_ *p, max int32) *g { + if sched.runqsize == 0 { + return nil + } + + n := sched.runqsize/gomaxprocs + 1 + if n > sched.runqsize { + n = sched.runqsize + } + if max > 0 && n > max { + n = max + } + if n > int32(len(_p_.runq))/2 { + n = int32(len(_p_.runq)) / 2 + } + + sched.runqsize -= n + if sched.runqsize == 0 { + sched.runqtail = nil + } + + gp := sched.runqhead + sched.runqhead = gp.schedlink + n-- + for ; n > 0; n-- { + gp1 := sched.runqhead + sched.runqhead = gp1.schedlink + runqput(_p_, gp1) + } + return gp + } + + // Put p to on _Pidle list. + // Sched must be locked. + func pidleput(_p_ *p) { + _p_.link = sched.pidle + sched.pidle = _p_ + xadd(&sched.npidle, 1) // TODO: fast atomic + } + + // Try get a p from _Pidle list. + // Sched must be locked. + func pidleget() *p { + _p_ := sched.pidle + if _p_ != nil { + sched.pidle = _p_.link + xadd(&sched.npidle, -1) // TODO: fast atomic + } + return _p_ + } + + // Try to put g on local runnable queue. + // If it's full, put onto global queue. + // Executed only by the owner P. + func runqput(_p_ *p, gp *g) { + retry: + h := atomicload(&_p_.runqhead) // load-acquire, synchronize with consumers + t := _p_.runqtail + if t-h < uint32(len(_p_.runq)) { + _p_.runq[t%uint32(len(_p_.runq))] = gp + atomicstore(&_p_.runqtail, t+1) // store-release, makes the item available for consumption + return + } + if runqputslow(_p_, gp, h, t) { + return + } + // the queue is not full, now the put above must suceed + goto retry + } + + // Put g and a batch of work from local runnable queue on global queue. + // Executed only by the owner P. + func runqputslow(_p_ *p, gp *g, h, t uint32) bool { + var batch [len(_p_.runq)/2 + 1]*g + + // First, grab a batch from local queue. + n := t - h + n = n / 2 + if n != uint32(len(_p_.runq)/2) { + gothrow("runqputslow: queue is not full") + } + for i := uint32(0); i < n; i++ { + batch[i] = _p_.runq[(h+i)%uint32(len(_p_.runq))] + } + if !cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume + return false + } + batch[n] = gp + + // Link the goroutines. + for i := uint32(0); i < n; i++ { + batch[i].schedlink = batch[i+1] + } + + // Now put the batch on global queue. + lock(&sched.lock) + globrunqputbatch(batch[0], batch[n], int32(n+1)) + unlock(&sched.lock) + return true + } + + // Get g from local runnable queue. + // Executed only by the owner P. + func runqget(_p_ *p) *g { + for { + h := atomicload(&_p_.runqhead) // load-acquire, synchronize with other consumers + t := _p_.runqtail + if t == h { + return nil + } + gp := _p_.runq[h%uint32(len(_p_.runq))] + if cas(&_p_.runqhead, h, h+1) { // cas-release, commits consume + return gp + } + } + } + + // Grabs a batch of goroutines from local runnable queue. + // batch array must be of size len(p->runq)/2. Returns number of grabbed goroutines. + // Can be executed by any P. + func runqgrab(_p_ *p, batch []*g) uint32 { + for { + h := atomicload(&_p_.runqhead) // load-acquire, synchronize with other consumers + t := atomicload(&_p_.runqtail) // load-acquire, synchronize with the producer + n := t - h + n = n - n/2 + if n == 0 { + return 0 + } + if n > uint32(len(_p_.runq)/2) { // read inconsistent h and t + continue + } + for i := uint32(0); i < n; i++ { + batch[i] = _p_.runq[(h+i)%uint32(len(_p_.runq))] + } + if cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume + return n + } + } + } + + // Steal half of elements from local runnable queue of p2 + // and put onto local runnable queue of p. + // Returns one of the stolen elements (or nil if failed). + func runqsteal(_p_, p2 *p) *g { + var batch [len(_p_.runq) / 2]*g + + n := runqgrab(p2, batch[:]) + if n == 0 { + return nil + } + n-- + gp := batch[n] + if n == 0 { + return gp + } + h := atomicload(&_p_.runqhead) // load-acquire, synchronize with consumers + t := _p_.runqtail + if t-h+n >= uint32(len(_p_.runq)) { + gothrow("runqsteal: runq overflow") + } + for i := uint32(0); i < n; i++ { + _p_.runq[(t+i)%uint32(len(_p_.runq))] = batch[i] + } + atomicstore(&_p_.runqtail, t+n) // store-release, makes the item available for consumption + return gp + } + + func testSchedLocalQueue() { + _p_ := new(p) + gs := make([]g, len(_p_.runq)) + for i := 0; i < len(_p_.runq); i++ { + if runqget(_p_) != nil { + gothrow("runq is not empty initially") + } + for j := 0; j < i; j++ { + runqput(_p_, &gs[i]) + } + for j := 0; j < i; j++ { + if runqget(_p_) != &gs[i] { + print("bad element at iter ", i, "/", j, "\n") + gothrow("bad element") + } + } + if runqget(_p_) != nil { + gothrow("runq is not empty afterwards") + } + } + } + + func testSchedLocalQueueSteal() { + p1 := new(p) + p2 := new(p) + gs := make([]g, len(p1.runq)) + for i := 0; i < len(p1.runq); i++ { + for j := 0; j < i; j++ { + gs[j].sig = 0 + runqput(p1, &gs[j]) + } + gp := runqsteal(p2, p1) + s := 0 + if gp != nil { + s++ + gp.sig++ + } + for { + gp = runqget(p2) + if gp == nil { + break + } + s++ + gp.sig++ + } + for { + gp = runqget(p1) + if gp == nil { + break + } + gp.sig++ + } + for j := 0; j < i; j++ { + if gs[j].sig != 1 { + print("bad element ", j, "(", gs[j].sig, ") at iter ", i, "\n") + gothrow("bad element") + } + } + if s != i/2 && s != i/2+1 { + print("bad steal ", s, ", want ", i/2, " or ", i/2+1, ", iter ", i, "\n") + gothrow("bad steal") + } + } + } + + func setMaxThreads(in int) (out int) { + lock(&sched.lock) + out = int(sched.maxmcount) + sched.maxmcount = int32(in) + checkmcount() + unlock(&sched.lock) + return + } + + var goexperiment string = "GOEXPERIMENT" // TODO: defined in zaexperiment.h + + func haveexperiment(name string) bool { + x := goexperiment + for x != "" { + xname := "" + i := index(x, ",") + if i < 0 { + xname, x = x, "" + } else { + xname, x = x[:i], x[i+1:] + } + if xname == name { + return true + } + } + return false + } + + //go:nosplit + func sync_procPin() int { + _g_ := getg() + mp := _g_.m + + mp.locks++ + return int(mp.p.id) + } + + //go:nosplit + func sync_procUnpin() { + _g_ := getg() + _g_.m.locks-- + } diff --cc src/runtime/rt0_linux_386.s index 352e594d53,352e594d53..47fd908e78 --- a/src/runtime/rt0_linux_386.s +++ b/src/runtime/rt0_linux_386.s @@@ -9,7 -9,7 +9,6 @@@ TEXT _rt0_386_linux(SB),NOSPLIT,$ LEAL 12(SP), BX MOVL AX, 0(SP) MOVL BX, 4(SP) -- CALL runtime·linux_setup_vdso(SB) CALL main(SB) INT $3 diff --cc src/runtime/runtime2.go index 0000000000,c999b3072d..7625a2dd81 mode 000000,100644..100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@@ -1,0 -1,608 +1,613 @@@ + // Copyright 2009 The Go Authors. All rights reserved. + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + + package runtime + + import "unsafe" + + /* + * defined constants + */ + const ( + // G status + // + // If you add to this list, add to the list + // of "okay during garbage collection" status + // in mgc0.c too. + _Gidle = iota // 0 + _Grunnable // 1 runnable and on a run queue + _Grunning // 2 + _Gsyscall // 3 + _Gwaiting // 4 + _Gmoribund_unused // 5 currently unused, but hardcoded in gdb scripts + _Gdead // 6 + _Genqueue // 7 Only the Gscanenqueue is used. + _Gcopystack // 8 in this state when newstack is moving the stack + // the following encode that the GC is scanning the stack and what to do when it is done + _Gscan = 0x1000 // atomicstatus&~Gscan = the non-scan state, + // _Gscanidle = _Gscan + _Gidle, // Not used. Gidle only used with newly malloced gs + _Gscanrunnable = _Gscan + _Grunnable // 0x1001 When scanning complets make Grunnable (it is already on run queue) + _Gscanrunning = _Gscan + _Grunning // 0x1002 Used to tell preemption newstack routine to scan preempted stack. + _Gscansyscall = _Gscan + _Gsyscall // 0x1003 When scanning completes make is Gsyscall + _Gscanwaiting = _Gscan + _Gwaiting // 0x1004 When scanning completes make it Gwaiting + // _Gscanmoribund_unused, // not possible + // _Gscandead, // not possible + _Gscanenqueue = _Gscan + _Genqueue // When scanning completes make it Grunnable and put on runqueue + ) + + const ( + // P status + _Pidle = iota + _Prunning + _Psyscall + _Pgcstop + _Pdead + ) + + // XXX inserting below here + + type mutex struct { + // Futex-based impl treats it as uint32 key, + // while sema-based impl as M* waitm. + // Used to be a union, but unions break precise GC. + key uintptr + } + + type note struct { + // Futex-based impl treats it as uint32 key, + // while sema-based impl as M* waitm. + // Used to be a union, but unions break precise GC. + key uintptr + } + + type _string struct { + str *byte + len int + } + + type funcval struct { + fn uintptr + // variable-size, fn-specific data here + } + + type iface struct { + tab *itab + data unsafe.Pointer + } + + type eface struct { + _type *_type + data unsafe.Pointer + } + + type slice struct { + array *byte // actual data + len uint // number of elements + cap uint // allocated number of elements + } + + type gobuf struct { + // The offsets of sp, pc, and g are known to (hard-coded in) libmach. + sp uintptr + pc uintptr + g *g + ctxt unsafe.Pointer // this has to be a pointer so that gc scans it + ret uintreg + lr uintptr + } + + // Known to compiler. + // Changes here must also be made in src/cmd/gc/select.c's selecttype. + type sudog struct { + g *g + selectdone *uint32 + next *sudog + prev *sudog + elem unsafe.Pointer // data element + releasetime int64 + nrelease int32 // -1 for acquire + waitlink *sudog // g.waiting list + } + + type gcstats struct { + // the struct must consist of only uint64's, + // because it is casted to uint64[]. + nhandoff uint64 + nhandoffcnt uint64 + nprocyield uint64 + nosyield uint64 + nsleep uint64 + } + + type libcall struct { + fn uintptr + n uintptr // number of parameters + args uintptr // parameters + r1 uintptr // return values + r2 uintptr + err uintptr // error number + } + + // describes how to handle callback + type wincallbackcontext struct { + gobody unsafe.Pointer // go function to call + argsize uintptr // callback arguments size (in bytes) + restorestack uintptr // adjust stack on return by (in bytes) (386 only) + cleanstack bool + } + + // Stack describes a Go execution stack. + // The bounds of the stack are exactly [lo, hi), + // with no implicit data structures on either side. + type stack struct { + lo uintptr + hi uintptr + } + + type g struct { + // Stack parameters. + // stack describes the actual stack memory: [stack.lo, stack.hi). + // stackguard0 is the stack pointer compared in the Go stack growth prologue. + // It is stack.lo+StackGuard normally, but can be StackPreempt to trigger a preemption. + // stackguard1 is the stack pointer compared in the C stack growth prologue. + // It is stack.lo+StackGuard on g0 and gsignal stacks. + // It is ~0 on other goroutine stacks, to trigger a call to morestackc (and crash). + stack stack // offset known to runtime/cgo + stackguard0 uintptr // offset known to liblink + stackguard1 uintptr // offset known to liblink + + _panic *_panic // innermost panic - offset known to liblink + _defer *_defer // innermost defer + sched gobuf + syscallsp uintptr // if status==gsyscall, syscallsp = sched.sp to use during gc + syscallpc uintptr // if status==gsyscall, syscallpc = sched.pc to use during gc + param unsafe.Pointer // passed parameter on wakeup + atomicstatus uint32 + goid int64 + waitsince int64 // approx time when the g become blocked + waitreason string // if status==gwaiting + schedlink *g + issystem bool // do not output in stack dump, ignore in deadlock detector + preempt bool // preemption signal, duplicates stackguard0 = stackpreempt + paniconfault bool // panic (instead of crash) on unexpected fault address + preemptscan bool // preempted g does scan for gc + gcworkdone bool // debug: cleared at begining of gc work phase cycle, set by gcphasework, tested at end of cycle + throwsplit bool // must not split stack + raceignore int8 // ignore race detection events + m *m // for debuggers, but offset not hard-coded + lockedm *m + sig uint32 + writebuf []byte + sigcode0 uintptr + sigcode1 uintptr + sigpc uintptr + gopc uintptr // pc of go statement that created this goroutine + racectx uintptr + waiting *sudog // sudog structures this g is waiting on (that have a valid elem ptr) + end [0]byte + } + + type mts struct { + tv_sec int64 + tv_nsec int64 + } + + type mscratch struct { + v [6]uintptr + } + + type m struct { + g0 *g // goroutine with scheduling stack + morebuf gobuf // gobuf arg to morestack + + // Fields not known to debuggers. + procid uint64 // for debuggers, but offset not hard-coded + gsignal *g // signal-handling g + tls [4]uintptr // thread-local storage (for x86 extern register) + mstartfn unsafe.Pointer // todo go func() + curg *g // current running goroutine + caughtsig *g // goroutine running during fatal signal + p *p // attached p for executing go code (nil if not executing go code) + nextp *p + id int32 + mallocing int32 + throwing int32 + gcing int32 + locks int32 + softfloat int32 + dying int32 + profilehz int32 + helpgc int32 + spinning bool // m is out of work and is actively looking for work + blocked bool // m is blocked on a note ++ inwb bool // m is executing a write barrier ++ printlock int8 + fastrand uint32 + ncgocall uint64 // number of cgo calls in total + ncgo int32 // number of cgo calls currently in progress + cgomal *cgomal + park note + alllink *m // on allm + schedlink *m + machport uint32 // return address for mach ipc (os x) + mcache *mcache + lockedg *g + createstack [32]uintptr // stack that created this thread. + freglo [16]uint32 // d[i] lsb and f[i] + freghi [16]uint32 // d[i] msb and f[i+16] + fflag uint32 // floating point compare flags + locked uint32 // tracking for lockosthread + nextwaitm *m // next m waiting for lock + waitsema uintptr // semaphore for parking on locks + waitsemacount uint32 + waitsemalock uint32 + gcstats gcstats + needextram bool + traceback uint8 + waitunlockf unsafe.Pointer // todo go func(*g, unsafe.pointer) bool + waitlock unsafe.Pointer + //#ifdef GOOS_windows + thread uintptr // thread handle + // these are here because they are too large to be on the stack + // of low-level NOSPLIT functions. + libcall libcall + libcallpc uintptr // for cpu profiler + libcallsp uintptr + libcallg *g + //#endif + //#ifdef GOOS_solaris + perrno *int32 // pointer to tls errno + // these are here because they are too large to be on the stack + // of low-level NOSPLIT functions. + //LibCall libcall; + ts mts + scratch mscratch + //#endif + //#ifdef GOOS_plan9 + notesig *int8 + errstr *byte + //#endif + end [0]byte + } + + type p struct { + lock mutex + + id int32 + status uint32 // one of pidle/prunning/... + link *p + schedtick uint32 // incremented on every scheduler call + syscalltick uint32 // incremented on every system call + m *m // back-link to associated m (nil if idle) + mcache *mcache + deferpool [5]*_defer // pool of available defer structs of different sizes (see panic.c) + + // Cache of goroutine ids, amortizes accesses to runtime·sched.goidgen. + goidcache uint64 + goidcacheend uint64 + + // Queue of runnable goroutines. + runqhead uint32 + runqtail uint32 + runq [256]*g + + // Available G's (status == Gdead) + gfree *g + gfreecnt int32 + + pad [64]byte + } + + const ( + // The max value of GOMAXPROCS. + // There are no fundamental restrictions on the value. + _MaxGomaxprocs = 1 << 8 + ) + + type schedt struct { + lock mutex + + goidgen uint64 + + midle *m // idle m's waiting for work + nmidle int32 // number of idle m's waiting for work + nmidlelocked int32 // number of locked m's waiting for work + mcount int32 // number of m's that have been created + maxmcount int32 // maximum number of m's allowed (or die) + + pidle *p // idle p's + npidle uint32 + nmspinning uint32 + + // Global runnable queue. + runqhead *g + runqtail *g + runqsize int32 + + // Global cache of dead G's. + gflock mutex + gfree *g + ngfree int32 + + gcwaiting uint32 // gc is waiting to run + stopwait int32 + stopnote note + sysmonwait uint32 + sysmonnote note + lastpoll uint64 + + profilehz int32 // cpu profiling rate + } + + // The m->locked word holds two pieces of state counting active calls to LockOSThread/lockOSThread. + // The low bit (LockExternal) is a boolean reporting whether any LockOSThread call is active. + // External locks are not recursive; a second lock is silently ignored. + // The upper bits of m->lockedcount record the nesting depth of calls to lockOSThread + // (counting up by LockInternal), popped by unlockOSThread (counting down by LockInternal). + // Internal locks can be recursive. For instance, a lock for cgo can occur while the main + // goroutine is holding the lock during the initialization phase. + const ( + _LockExternal = 1 + _LockInternal = 2 + ) + + type sigtabtt struct { + flags int32 + name *int8 + } + + const ( + _SigNotify = 1 << 0 // let signal.Notify have signal, even if from kernel + _SigKill = 1 << 1 // if signal.Notify doesn't take it, exit quietly + _SigThrow = 1 << 2 // if signal.Notify doesn't take it, exit loudly + _SigPanic = 1 << 3 // if the signal is from the kernel, panic + _SigDefault = 1 << 4 // if the signal isn't explicitly requested, don't monitor it + _SigHandling = 1 << 5 // our signal handler is registered + _SigIgnored = 1 << 6 // the signal was ignored before we registered for it + _SigGoExit = 1 << 7 // cause all runtime procs to exit (only used on Plan 9). + ) + + // Layout of in-memory per-function information prepared by linker + // See http://golang.org/s/go12symtab. + // Keep in sync with linker and with ../../libmach/sym.c + // and with package debug/gosym and with symtab.go in package runtime. + type _func struct { + entry uintptr // start pc + nameoff int32 // function name + + args int32 // in/out args size + frame int32 // legacy frame size; use pcsp if possible + + pcsp int32 + pcfile int32 + pcln int32 + npcdata int32 + nfuncdata int32 + } + + // layout of Itab known to compilers + // allocated in non-garbage-collected memory + type itab struct { + inter *interfacetype + _type *_type + link *itab + bad int32 + unused int32 + fun [0]uintptr + } + + const ( + // TODO: Generate in cmd/dist. + _NaCl = 0 + _Windows = 0 + _Solaris = 0 + _Plan9 = 0 + ) + + // Lock-free stack node. ++// // Also known to export_test.go. + type lfnode struct { - next *lfnode ++ next uint64 + pushcnt uintptr + } + + // Parallel for descriptor. + type parfor struct { + body unsafe.Pointer // go func(*parfor, uint32), executed for each element + done uint32 // number of idle threads + nthr uint32 // total number of threads + nthrmax uint32 // maximum number of threads + thrseq uint32 // thread id sequencer + cnt uint32 // iteration space [0, cnt) + ctx unsafe.Pointer // arbitrary user context + wait bool // if true, wait while all threads finish processing, + // otherwise parfor may return while other threads are still working + thr *parforthread // array of thread descriptors + pad uint32 // to align parforthread.pos for 64-bit atomic operations + // stats + nsteal uint64 + nstealcnt uint64 + nprocyield uint64 + nosyield uint64 + nsleep uint64 + } + + // Track memory allocated by code not written in Go during a cgo call, + // so that the garbage collector can see them. + type cgomal struct { + next *cgomal + alloc unsafe.Pointer + } + + // Holds variables parsed from GODEBUG env var. + type debugvars struct { + allocfreetrace int32 + efence int32 + gctrace int32 + gcdead int32 + scheddetail int32 + schedtrace int32 + scavenge int32 + } + + // Indicates to write barrier and sychronization task to preform. + const ( - _GCoff = iota // stop and start nop - _GCquiesce // stop and start nop - _GCstw // stop the ps nop - _GCmark // scan the stacks and start no white to black - _GCsweep // stop and start nop ++ _GCoff = iota // GC not running, write barrier disabled ++ _GCquiesce // unused state ++ _GCstw // unused state ++ _GCscan // GC collecting roots into workbufs, write barrier disabled ++ _GCmark // GC marking from workbufs, write barrier ENABLED ++ _GCmarktermination // GC mark termination: allocate black, P's help GC, write barrier ENABLED ++ _GCsweep // GC mark completed; sweeping in background, write barrier disabled + ) + + type forcegcstate struct { + lock mutex + g *g + idle uint32 + } + + var gcphase uint32 + + /* + * known to compiler + */ + const ( + _Structrnd = regSize + ) + + var startup_random_data *byte + var startup_random_data_len uint32 + + var invalidptr int32 + + const ( + // hashinit wants this many random bytes + _HashRandomBytes = 32 + ) + + /* + * deferred subroutine calls + */ + type _defer struct { + siz int32 + started bool + argp uintptr // where args were copied from + pc uintptr + fn *funcval + _panic *_panic // panic that is running defer + link *_defer + } + + /* + * panics + */ + type _panic struct { + argp unsafe.Pointer // pointer to arguments of deferred call run during panic; cannot move - known to liblink + arg interface{} // argument to panic + link *_panic // link to earlier panic + recovered bool // whether this panic is over + aborted bool // the panic was aborted + } + + /* + * stack traces + */ + + type stkframe struct { + fn *_func // function being run + pc uintptr // program counter within fn + continpc uintptr // program counter where execution can continue, or 0 if not + lr uintptr // program counter at caller aka link register + sp uintptr // stack pointer at pc + fp uintptr // stack pointer at caller aka frame pointer + varp uintptr // top of local variables + argp uintptr // pointer to function arguments + arglen uintptr // number of bytes at argp + argmap *bitvector // force use of this argmap + } + + const ( + _TraceRuntimeFrames = 1 << 0 // include frames for internal runtime functions. + _TraceTrap = 1 << 1 // the initial PC, SP are from a trap, not a return PC from a call + ) + + const ( + // The maximum number of frames we print for a traceback + _TracebackMaxFrames = 100 + ) + + var ( + emptystring string + allg **g + allglen uintptr + lastg *g + allm *m + allp [_MaxGomaxprocs + 1]*p + gomaxprocs int32 + needextram uint32 + panicking uint32 + goos *int8 + ncpu int32 + iscgo bool + cpuid_ecx uint32 + cpuid_edx uint32 + debug debugvars + signote note + forcegc forcegcstate + sched schedt + newprocs int32 + ) + + /* + * mutual exclusion locks. in the uncontended case, + * as fast as spin locks (just a few user-level instructions), + * but on the contention path they sleep in the kernel. + * a zeroed Mutex is unlocked (no need to initialize each lock). + */ + + /* + * sleep and wakeup on one-time events. + * before any calls to notesleep or notewakeup, + * must call noteclear to initialize the Note. + * then, exactly one thread can call notesleep + * and exactly one thread can call notewakeup (once). + * once notewakeup has been called, the notesleep + * will return. future notesleep will return immediately. + * subsequent noteclear must be called only after + * previous notesleep has returned, e.g. it's disallowed + * to call noteclear straight after notewakeup. + * + * notetsleep is like notesleep but wakes up after + * a given number of nanoseconds even if the event + * has not yet happened. if a goroutine uses notetsleep to + * wake up early, it must wait to call noteclear until it + * can be sure that no other goroutine is calling + * notewakeup. + * + * notesleep/notetsleep are generally called on g0, + * notetsleepg is similar to notetsleep but is called on user g. + */ + // bool runtime·notetsleep(Note*, int64); // false - timeout + // bool runtime·notetsleepg(Note*, int64); // false - timeout + + /* + * Lock-free stack. + * Initialize uint64 head to 0, compare with 0 to test for emptiness. + * The stack does not keep pointers to nodes, + * so they can be garbage collected if there are no other pointers to nodes. + */ + + /* + * Parallel for over [0, n). + * body() is executed for each iteration. + * nthr - total number of worker threads. + * ctx - arbitrary user context. + * if wait=true, threads return from parfor() when all work is done; + * otherwise, threads can return while other threads are still finishing processing. + */ + + // for mmap, we only pass the lower 32 bits of file offset to the + // assembly routine; the higher bits (if required), should be provided + // by the assembly routine as 0. diff --cc src/runtime/stack1.go index 0000000000,40dfc76a6d..963f4fa731 mode 000000,100644..100644 --- a/src/runtime/stack1.go +++ b/src/runtime/stack1.go @@@ -1,0 -1,807 +1,818 @@@ + // Copyright 2013 The Go Authors. All rights reserved. + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + + package runtime + + import "unsafe" + + const ( + // StackDebug == 0: no logging + // == 1: logging of per-stack operations + // == 2: logging of per-frame operations + // == 3: logging of per-word updates + // == 4: logging of per-word reads + stackDebug = 0 + stackFromSystem = 0 // allocate stacks from system memory instead of the heap + stackFaultOnFree = 0 // old stacks are mapped noaccess to detect use after free + stackPoisonCopy = 0 // fill stack that should not be accessed with garbage, to detect bad dereferences during copy + + stackCache = 1 + ) + + const ( + uintptrMask = 1<<(8*ptrSize) - 1 + poisonGC = uintptrMask & 0xf969696969696969 + poisonStack = uintptrMask & 0x6868686868686868 + + // Goroutine preemption request. + // Stored into g->stackguard0 to cause split stack check failure. + // Must be greater than any real sp. + // 0xfffffade in hex. + stackPreempt = uintptrMask & -1314 + + // Thread is forking. + // Stored into g->stackguard0 to cause split stack check failure. + // Must be greater than any real sp. + stackFork = uintptrMask & -1234 + ) + + // Global pool of spans that have free stacks. + // Stacks are assigned an order according to size. + // order = log_2(size/FixedStack) + // There is a free list for each order. + // TODO: one lock per order? + var stackpool [_NumStackOrders]mspan + var stackpoolmu mutex + + var stackfreequeue stack + + func stackinit() { + if _StackCacheSize&_PageMask != 0 { + gothrow("cache size must be a multiple of page size") + } + for i := range stackpool { + mSpanList_Init(&stackpool[i]) + } + } + + // Allocates a stack from the free pool. Must be called with + // stackpoolmu held. + func stackpoolalloc(order uint8) *mlink { + list := &stackpool[order] + s := list.next + if s == list { + // no free stacks. Allocate another span worth. + s = mHeap_AllocStack(&mheap_, _StackCacheSize>>_PageShift) + if s == nil { + gothrow("out of memory") + } + if s.ref != 0 { + gothrow("bad ref") + } + if s.freelist != nil { + gothrow("bad freelist") + } + for i := uintptr(0); i < _StackCacheSize; i += _FixedStack << order { + x := (*mlink)(unsafe.Pointer(uintptr(s.start)<<_PageShift + i)) + x.next = s.freelist + s.freelist = x + } + mSpanList_Insert(list, s) + } + x := s.freelist + if x == nil { + gothrow("span has no free stacks") + } + s.freelist = x.next + s.ref++ + if s.freelist == nil { + // all stacks in s are allocated. + mSpanList_Remove(s) + } + return x + } + + // Adds stack x to the free pool. Must be called with stackpoolmu held. + func stackpoolfree(x *mlink, order uint8) { + s := mHeap_Lookup(&mheap_, (unsafe.Pointer)(x)) + if s.state != _MSpanStack { + gothrow("freeing stack not in a stack span") + } + if s.freelist == nil { + // s will now have a free stack + mSpanList_Insert(&stackpool[order], s) + } + x.next = s.freelist + s.freelist = x + s.ref-- + if s.ref == 0 { + // span is completely free - return to heap + mSpanList_Remove(s) + s.freelist = nil + mHeap_FreeStack(&mheap_, s) + } + } + + // stackcacherefill/stackcacherelease implement a global pool of stack segments. + // The pool is required to prevent unlimited growth of per-thread caches. + func stackcacherefill(c *mcache, order uint8) { + if stackDebug >= 1 { + print("stackcacherefill order=", order, "\n") + } + + // Grab some stacks from the global cache. + // Grab half of the allowed capacity (to prevent thrashing). + var list *mlink + var size uintptr + lock(&stackpoolmu) + for size < _StackCacheSize/2 { + x := stackpoolalloc(order) + x.next = list + list = x + size += _FixedStack << order + } + unlock(&stackpoolmu) + c.stackcache[order].list = list + c.stackcache[order].size = size + } + + func stackcacherelease(c *mcache, order uint8) { + if stackDebug >= 1 { + print("stackcacherelease order=", order, "\n") + } + x := c.stackcache[order].list + size := c.stackcache[order].size + lock(&stackpoolmu) + for size > _StackCacheSize/2 { + y := x.next + stackpoolfree(x, order) + x = y + size -= _FixedStack << order + } + unlock(&stackpoolmu) + c.stackcache[order].list = x + c.stackcache[order].size = size + } + + func stackcache_clear(c *mcache) { + if stackDebug >= 1 { + print("stackcache clear\n") + } + lock(&stackpoolmu) + for order := uint8(0); order < _NumStackOrders; order++ { + x := c.stackcache[order].list + for x != nil { + y := x.next + stackpoolfree(x, order) + x = y + } + c.stackcache[order].list = nil + c.stackcache[order].size = 0 + } + unlock(&stackpoolmu) + } + + func stackalloc(n uint32) stack { + // Stackalloc must be called on scheduler stack, so that we + // never try to grow the stack during the code that stackalloc runs. + // Doing so would cause a deadlock (issue 1547). + thisg := getg() + if thisg != thisg.m.g0 { + gothrow("stackalloc not on scheduler stack") + } + if n&(n-1) != 0 { + gothrow("stack size not a power of 2") + } + if stackDebug >= 1 { + print("stackalloc ", n, "\n") + } + + if debug.efence != 0 || stackFromSystem != 0 { + v := sysAlloc(round(uintptr(n), _PageSize), &memstats.stacks_sys) + if v == nil { + gothrow("out of memory (stackalloc)") + } + return stack{uintptr(v), uintptr(v) + uintptr(n)} + } + + // Small stacks are allocated with a fixed-size free-list allocator. + // If we need a stack of a bigger size, we fall back on allocating + // a dedicated span. + var v unsafe.Pointer + if stackCache != 0 && n < _FixedStack<<_NumStackOrders && n < _StackCacheSize { + order := uint8(0) + n2 := n + for n2 > _FixedStack { + order++ + n2 >>= 1 + } + var x *mlink + c := thisg.m.mcache + if c == nil || thisg.m.gcing != 0 || thisg.m.helpgc != 0 { + // c == nil can happen in the guts of exitsyscall or + // procresize. Just get a stack from the global pool. + // Also don't touch stackcache during gc + // as it's flushed concurrently. + lock(&stackpoolmu) + x = stackpoolalloc(order) + unlock(&stackpoolmu) + } else { + x = c.stackcache[order].list + if x == nil { + stackcacherefill(c, order) + x = c.stackcache[order].list + } + c.stackcache[order].list = x.next + c.stackcache[order].size -= uintptr(n) + } + v = (unsafe.Pointer)(x) + } else { + s := mHeap_AllocStack(&mheap_, round(uintptr(n), _PageSize)>>_PageShift) + if s == nil { + gothrow("out of memory") + } + v = (unsafe.Pointer)(s.start << _PageShift) + } + + if raceenabled { + racemalloc(v, uintptr(n)) + } + if stackDebug >= 1 { + print(" allocated ", v, "\n") + } + return stack{uintptr(v), uintptr(v) + uintptr(n)} + } + + func stackfree(stk stack) { + gp := getg() + n := stk.hi - stk.lo + v := (unsafe.Pointer)(stk.lo) + if n&(n-1) != 0 { + gothrow("stack not a power of 2") + } + if stackDebug >= 1 { + println("stackfree", v, n) + memclr(v, n) // for testing, clobber stack data + } + if debug.efence != 0 || stackFromSystem != 0 { + if debug.efence != 0 || stackFaultOnFree != 0 { + sysFault(v, n) + } else { + sysFree(v, n, &memstats.stacks_sys) + } + return + } + if stackCache != 0 && n < _FixedStack<<_NumStackOrders && n < _StackCacheSize { + order := uint8(0) + n2 := n + for n2 > _FixedStack { + order++ + n2 >>= 1 + } + x := (*mlink)(v) + c := gp.m.mcache + if c == nil || gp.m.gcing != 0 || gp.m.helpgc != 0 { + lock(&stackpoolmu) + stackpoolfree(x, order) + unlock(&stackpoolmu) + } else { + if c.stackcache[order].size >= _StackCacheSize { + stackcacherelease(c, order) + } + x.next = c.stackcache[order].list + c.stackcache[order].list = x + c.stackcache[order].size += n + } + } else { + s := mHeap_Lookup(&mheap_, v) + if s.state != _MSpanStack { + println(hex(s.start<<_PageShift), v) + gothrow("bad span state") + } + mHeap_FreeStack(&mheap_, s) + } + } + + var maxstacksize uintptr = 1 << 20 // enough until runtime.main sets it for real + + var mapnames = []string{ + _BitsDead: "---", + _BitsScalar: "scalar", + _BitsPointer: "ptr", + } + + // Stack frame layout + // + // (x86) + // +------------------+ + // | args from caller | + // +------------------+ <- frame->argp + // | return address | + // +------------------+ <- frame->varp + // | locals | + // +------------------+ + // | args to callee | + // +------------------+ <- frame->sp + // + // (arm) + // +------------------+ + // | args from caller | + // +------------------+ <- frame->argp + // | caller's retaddr | + // +------------------+ <- frame->varp + // | locals | + // +------------------+ + // | args to callee | + // +------------------+ + // | return address | + // +------------------+ <- frame->sp + + type adjustinfo struct { + old stack + delta uintptr // ptr distance from old to new stack (newbase - oldbase) + } + + // Adjustpointer checks whether *vpp is in the old stack described by adjinfo. + // If so, it rewrites *vpp to point into the new stack. + func adjustpointer(adjinfo *adjustinfo, vpp unsafe.Pointer) { + pp := (*unsafe.Pointer)(vpp) + p := *pp + if stackDebug >= 4 { + print(" ", pp, ":", p, "\n") + } + if adjinfo.old.lo <= uintptr(p) && uintptr(p) < adjinfo.old.hi { + *pp = add(p, adjinfo.delta) + if stackDebug >= 3 { + print(" adjust ptr ", pp, ":", p, " -> ", *pp, "\n") + } + } + } + + type gobitvector struct { + n uintptr + bytedata []uint8 + } + + func gobv(bv bitvector) gobitvector { + return gobitvector{ + uintptr(bv.n), + (*[1 << 30]byte)(unsafe.Pointer(bv.bytedata))[:(bv.n+7)/8], + } + } + + func ptrbits(bv *gobitvector, i uintptr) uint8 { + return (bv.bytedata[i/4] >> ((i & 3) * 2)) & 3 + } + + // bv describes the memory starting at address scanp. + // Adjust any pointers contained therein. + func adjustpointers(scanp unsafe.Pointer, cbv *bitvector, adjinfo *adjustinfo, f *_func) { + bv := gobv(*cbv) + minp := adjinfo.old.lo + maxp := adjinfo.old.hi + delta := adjinfo.delta + num := uintptr(bv.n / _BitsPerPointer) + for i := uintptr(0); i < num; i++ { + if stackDebug >= 4 { + print(" ", add(scanp, i*ptrSize), ":", mapnames[ptrbits(&bv, i)], ":", hex(*(*uintptr)(add(scanp, i*ptrSize))), " # ", i, " ", bv.bytedata[i/4], "\n") + } + switch ptrbits(&bv, i) { + default: + gothrow("unexpected pointer bits") + case _BitsDead: + if debug.gcdead != 0 { + *(*unsafe.Pointer)(add(scanp, i*ptrSize)) = unsafe.Pointer(uintptr(poisonStack)) + } + case _BitsScalar: + // ok + case _BitsPointer: + p := *(*unsafe.Pointer)(add(scanp, i*ptrSize)) + up := uintptr(p) + if f != nil && 0 < up && up < _PageSize && invalidptr != 0 || up == poisonGC || up == poisonStack { + // Looks like a junk value in a pointer slot. + // Live analysis wrong? + getg().m.traceback = 2 + print("runtime: bad pointer in frame ", gofuncname(f), " at ", add(scanp, i*ptrSize), ": ", p, "\n") + gothrow("invalid stack pointer") + } + if minp <= up && up < maxp { + if stackDebug >= 3 { + print("adjust ptr ", p, " ", gofuncname(f), "\n") + } + *(*unsafe.Pointer)(add(scanp, i*ptrSize)) = unsafe.Pointer(up + delta) + } + } + } + } + + // Note: the argument/return area is adjusted by the callee. + func adjustframe(frame *stkframe, arg unsafe.Pointer) bool { + adjinfo := (*adjustinfo)(arg) + targetpc := frame.continpc + if targetpc == 0 { + // Frame is dead. + return true + } + f := frame.fn + if stackDebug >= 2 { + print(" adjusting ", funcname(f), " frame=[", hex(frame.sp), ",", hex(frame.fp), "] pc=", hex(frame.pc), " continpc=", hex(frame.continpc), "\n") + } + if f.entry == systemstack_switchPC { + // A special routine at the bottom of stack of a goroutine that does an systemstack call. + // We will allow it to be copied even though we don't + // have full GC info for it (because it is written in asm). + return true + } + if targetpc != f.entry { + targetpc-- + } + pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc) + if pcdata == -1 { + pcdata = 0 // in prologue + } + + // Adjust local variables if stack frame has been allocated. + size := frame.varp - frame.sp + var minsize uintptr + if thechar != '6' && thechar != '8' { + minsize = ptrSize + } else { + minsize = 0 + } + if size > minsize { + var bv bitvector + stackmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps)) + if stackmap == nil || stackmap.n <= 0 { + print("runtime: frame ", funcname(f), " untyped locals ", hex(frame.varp-size), "+", hex(size), "\n") + gothrow("missing stackmap") + } + // Locals bitmap information, scan just the pointers in locals. + if pcdata < 0 || pcdata >= stackmap.n { + // don't know where we are + print("runtime: pcdata is ", pcdata, " and ", stackmap.n, " locals stack map entries for ", funcname(f), " (targetpc=", targetpc, ")\n") + gothrow("bad symbol table") + } + bv = stackmapdata(stackmap, pcdata) + size = (uintptr(bv.n) * ptrSize) / _BitsPerPointer + if stackDebug >= 3 { + print(" locals ", pcdata, "/", stackmap.n, " ", size/ptrSize, " words ", bv.bytedata, "\n") + } + adjustpointers(unsafe.Pointer(frame.varp-size), &bv, adjinfo, f) + } + + // Adjust arguments. + if frame.arglen > 0 { + var bv bitvector + if frame.argmap != nil { + bv = *frame.argmap + } else { + stackmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps)) + if stackmap == nil || stackmap.n <= 0 { + print("runtime: frame ", funcname(f), " untyped args ", frame.argp, "+", uintptr(frame.arglen), "\n") + gothrow("missing stackmap") + } + if pcdata < 0 || pcdata >= stackmap.n { + // don't know where we are + print("runtime: pcdata is ", pcdata, " and ", stackmap.n, " args stack map entries for ", funcname(f), " (targetpc=", targetpc, ")\n") + gothrow("bad symbol table") + } + bv = stackmapdata(stackmap, pcdata) + } + if stackDebug >= 3 { + print(" args\n") + } + adjustpointers(unsafe.Pointer(frame.argp), &bv, adjinfo, nil) + } + return true + } + + func adjustctxt(gp *g, adjinfo *adjustinfo) { + adjustpointer(adjinfo, (unsafe.Pointer)(&gp.sched.ctxt)) + } + + func adjustdefers(gp *g, adjinfo *adjustinfo) { + // Adjust defer argument blocks the same way we adjust active stack frames. + tracebackdefers(gp, adjustframe, noescape(unsafe.Pointer(adjinfo))) + + // Adjust pointers in the Defer structs. + // Defer structs themselves are never on the stack. + for d := gp._defer; d != nil; d = d.link { + adjustpointer(adjinfo, (unsafe.Pointer)(&d.fn)) + adjustpointer(adjinfo, (unsafe.Pointer)(&d.argp)) + adjustpointer(adjinfo, (unsafe.Pointer)(&d._panic)) + } + } + + func adjustpanics(gp *g, adjinfo *adjustinfo) { + // Panics are on stack and already adjusted. + // Update pointer to head of list in G. + adjustpointer(adjinfo, (unsafe.Pointer)(&gp._panic)) + } + + func adjustsudogs(gp *g, adjinfo *adjustinfo) { + // the data elements pointed to by a SudoG structure + // might be in the stack. + for s := gp.waiting; s != nil; s = s.waitlink { + adjustpointer(adjinfo, (unsafe.Pointer)(&s.elem)) + adjustpointer(adjinfo, (unsafe.Pointer)(&s.selectdone)) + } + } + + func fillstack(stk stack, b byte) { + for p := stk.lo; p < stk.hi; p++ { + *(*byte)(unsafe.Pointer(p)) = b + } + } + + // Copies gp's stack to a new stack of a different size. ++// Caller must have changed gp status to Gcopystack. + func copystack(gp *g, newsize uintptr) { + if gp.syscallsp != 0 { + gothrow("stack growth not allowed in system call") + } + old := gp.stack + if old.lo == 0 { + gothrow("nil stackbase") + } + used := old.hi - gp.sched.sp + + // allocate new stack + new := stackalloc(uint32(newsize)) + if stackPoisonCopy != 0 { + fillstack(new, 0xfd) + } + if stackDebug >= 1 { + print("copystack gp=", gp, " [", hex(old.lo), " ", hex(old.hi-used), " ", hex(old.hi), "]/", old.hi-old.lo, " -> [", hex(new.lo), " ", hex(new.hi-used), " ", hex(new.hi), "]/", newsize, "\n") + } + + // adjust pointers in the to-be-copied frames + var adjinfo adjustinfo + adjinfo.old = old + adjinfo.delta = new.hi - old.hi + gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, adjustframe, noescape(unsafe.Pointer(&adjinfo)), 0) + + // adjust other miscellaneous things that have pointers into stacks. + adjustctxt(gp, &adjinfo) + adjustdefers(gp, &adjinfo) + adjustpanics(gp, &adjinfo) + adjustsudogs(gp, &adjinfo) + + // copy the stack to the new location + if stackPoisonCopy != 0 { + fillstack(new, 0xfb) + } + memmove(unsafe.Pointer(new.hi-used), unsafe.Pointer(old.hi-used), used) + - oldstatus := readgstatus(gp) - oldstatus &^= _Gscan - if oldstatus == _Gwaiting || oldstatus == _Grunnable { - casgstatus(gp, oldstatus, _Gcopystack) // oldstatus is Gwaiting or Grunnable - } else { - gothrow("copystack: bad status, not Gwaiting or Grunnable") - } - + // Swap out old stack for new one + gp.stack = new + gp.stackguard0 = new.lo + _StackGuard // NOTE: might clobber a preempt request + gp.sched.sp = new.hi - used + - casgstatus(gp, _Gcopystack, oldstatus) // oldstatus is Gwaiting or Grunnable - + // free old stack + if stackPoisonCopy != 0 { + fillstack(old, 0xfc) + } + if newsize > old.hi-old.lo { + // growing, free stack immediately + stackfree(old) + } else { + // shrinking, queue up free operation. We can't actually free the stack + // just yet because we might run into the following situation: + // 1) GC starts, scans a SudoG but does not yet mark the SudoG.elem pointer + // 2) The stack that pointer points to is shrunk + // 3) The old stack is freed + // 4) The containing span is marked free + // 5) GC attempts to mark the SudoG.elem pointer. The marking fails because + // the pointer looks like a pointer into a free span. + // By not freeing, we prevent step #4 until GC is done. + lock(&stackpoolmu) + *(*stack)(unsafe.Pointer(old.lo)) = stackfreequeue + stackfreequeue = old + unlock(&stackpoolmu) + } + } + + // round x up to a power of 2. + func round2(x int32) int32 { + s := uint(0) + for 1<atomicstatus will be Grunning or Gscanrunning upon entry. + // If the GC is trying to stop this g then it will set preemptscan to true. + func newstack() { + thisg := getg() + // TODO: double check all gp. shouldn't be getg(). + if thisg.m.morebuf.g.stackguard0 == stackFork { + gothrow("stack growth after fork") + } + if thisg.m.morebuf.g != thisg.m.curg { + print("runtime: newstack called from g=", thisg.m.morebuf.g, "\n"+"\tm=", thisg.m, " m->curg=", thisg.m.curg, " m->g0=", thisg.m.g0, " m->gsignal=", thisg.m.gsignal, "\n") + morebuf := thisg.m.morebuf + traceback(morebuf.pc, morebuf.sp, morebuf.lr, morebuf.g) + gothrow("runtime: wrong goroutine in newstack") + } + if thisg.m.curg.throwsplit { + gp := thisg.m.curg + // Update syscallsp, syscallpc in case traceback uses them. + morebuf := thisg.m.morebuf + gp.syscallsp = morebuf.sp + gp.syscallpc = morebuf.pc + print("runtime: newstack sp=", hex(gp.sched.sp), " stack=[", hex(gp.stack.lo), ", ", hex(gp.stack.hi), "]\n", + "\tmorebuf={pc:", hex(morebuf.pc), " sp:", hex(morebuf.sp), " lr:", hex(morebuf.lr), "}\n", + "\tsched={pc:", hex(gp.sched.pc), " sp:", hex(gp.sched.sp), " lr:", hex(gp.sched.lr), " ctxt:", gp.sched.ctxt, "}\n") + gothrow("runtime: stack split at bad time") + } + + // The goroutine must be executing in order to call newstack, + // so it must be Grunning or Gscanrunning. + + gp := thisg.m.curg + morebuf := thisg.m.morebuf + thisg.m.morebuf.pc = 0 + thisg.m.morebuf.lr = 0 + thisg.m.morebuf.sp = 0 + thisg.m.morebuf.g = nil + + casgstatus(gp, _Grunning, _Gwaiting) + gp.waitreason = "stack growth" + + rewindmorestack(&gp.sched) + + if gp.stack.lo == 0 { + gothrow("missing stack in newstack") + } + sp := gp.sched.sp + if thechar == '6' || thechar == '8' { + // The call to morestack cost a word. + sp -= ptrSize + } + if stackDebug >= 1 || sp < gp.stack.lo { + print("runtime: newstack sp=", hex(sp), " stack=[", hex(gp.stack.lo), ", ", hex(gp.stack.hi), "]\n", + "\tmorebuf={pc:", hex(morebuf.pc), " sp:", hex(morebuf.sp), " lr:", hex(morebuf.lr), "}\n", + "\tsched={pc:", hex(gp.sched.pc), " sp:", hex(gp.sched.sp), " lr:", hex(gp.sched.lr), " ctxt:", gp.sched.ctxt, "}\n") + } + if sp < gp.stack.lo { + print("runtime: gp=", gp, ", gp->status=", hex(readgstatus(gp)), "\n ") + print("runtime: split stack overflow: ", hex(sp), " < ", hex(gp.stack.lo), "\n") + gothrow("runtime: split stack overflow") + } + ++ if gp.sched.ctxt != nil { ++ // morestack wrote sched.ctxt on its way in here, ++ // without a write barrier. Run the write barrier now. ++ // It is not possible to be preempted between then ++ // and now, so it's okay. ++ writebarrierptr_nostore((*uintptr)(unsafe.Pointer(&gp.sched.ctxt)), uintptr(gp.sched.ctxt)) ++ } ++ + if gp.stackguard0 == stackPreempt { + if gp == thisg.m.g0 { + gothrow("runtime: preempt g0") + } + if thisg.m.p == nil && thisg.m.locks == 0 { + gothrow("runtime: g is running but p is not") + } + if gp.preemptscan { + gcphasework(gp) + casgstatus(gp, _Gwaiting, _Grunning) + gp.stackguard0 = gp.stack.lo + _StackGuard + gp.preempt = false + gp.preemptscan = false // Tells the GC premption was successful. + gogo(&gp.sched) // never return + } + + // Be conservative about where we preempt. + // We are interested in preempting user Go code, not runtime code. + if thisg.m.locks != 0 || thisg.m.mallocing != 0 || thisg.m.gcing != 0 || thisg.m.p.status != _Prunning { + // Let the goroutine keep running for now. + // gp->preempt is set, so it will be preempted next time. + gp.stackguard0 = gp.stack.lo + _StackGuard + casgstatus(gp, _Gwaiting, _Grunning) + gogo(&gp.sched) // never return + } + + // Act like goroutine called runtime.Gosched. + casgstatus(gp, _Gwaiting, _Grunning) + gosched_m(gp) // never return + } + + // Allocate a bigger segment and move the stack. + oldsize := int(gp.stack.hi - gp.stack.lo) + newsize := oldsize * 2 + if uintptr(newsize) > maxstacksize { + print("runtime: goroutine stack exceeds ", maxstacksize, "-byte limit\n") + gothrow("stack overflow") + } + - // Note that the concurrent GC might be scanning the stack as we try to replace it. - // copystack takes care of the appropriate coordination with the stack scanner. ++ oldstatus := readgstatus(gp) ++ oldstatus &^= _Gscan ++ casgstatus(gp, oldstatus, _Gcopystack) // oldstatus is Gwaiting or Grunnable ++ ++ // The concurrent GC will not scan the stack while we are doing the copy since ++ // the gp is in a Gcopystack status. + copystack(gp, uintptr(newsize)) + if stackDebug >= 1 { + print("stack grow done\n") + } - casgstatus(gp, _Gwaiting, _Grunning) ++ casgstatus(gp, _Gcopystack, _Grunning) + gogo(&gp.sched) + } + + //go:nosplit + func nilfunc() { + *(*uint8)(nil) = 0 + } + + // adjust Gobuf as if it executed a call to fn + // and then did an immediate gosave. + func gostartcallfn(gobuf *gobuf, fv *funcval) { + var fn unsafe.Pointer + if fv != nil { + fn = (unsafe.Pointer)(fv.fn) + } else { + fn = unsafe.Pointer(funcPC(nilfunc)) + } + gostartcall(gobuf, fn, (unsafe.Pointer)(fv)) + } + + // Maybe shrink the stack being used by gp. + // Called at garbage collection time. + func shrinkstack(gp *g) { + if readgstatus(gp) == _Gdead { + if gp.stack.lo != 0 { + // Free whole stack - it will get reallocated + // if G is used again. + stackfree(gp.stack) + gp.stack.lo = 0 + gp.stack.hi = 0 + } + return + } + if gp.stack.lo == 0 { + gothrow("missing stack in shrinkstack") + } + + oldsize := gp.stack.hi - gp.stack.lo + newsize := oldsize / 2 + if newsize < _FixedStack { + return // don't shrink below the minimum-sized stack + } + used := gp.stack.hi - gp.sched.sp + if used >= oldsize/4 { + return // still using at least 1/4 of the segment. + } + + // We can't copy the stack if we're in a syscall. + // The syscall might have pointers into the stack. + if gp.syscallsp != 0 { + return + } - - /* TODO - if _Windows && gp.m != nil && gp.m.libcallsp != 0 { ++ if _Windows != 0 && gp.m != nil && gp.m.libcallsp != 0 { + return + } - */ + + if stackDebug > 0 { + print("shrinking stack ", oldsize, "->", newsize, "\n") + } ++ ++ // This is being done in a Gscan state and was initiated by the GC so no need to move to ++ // the Gcopystate. ++ // The world is stopped, so the goroutine must be Gwaiting or Grunnable, ++ // and what it is is not changing underfoot. ++ oldstatus := readgstatus(gp) &^ _Gscan ++ if oldstatus != _Gwaiting && oldstatus != _Grunnable { ++ gothrow("status is not Gwaiting or Grunnable") ++ } ++ casgstatus(gp, oldstatus, _Gcopystack) + copystack(gp, newsize) ++ casgstatus(gp, _Gcopystack, oldstatus) + } + + // Do any delayed stack freeing that was queued up during GC. + func shrinkfinish() { + lock(&stackpoolmu) + s := stackfreequeue + stackfreequeue = stack{} + unlock(&stackpoolmu) + for s.lo != 0 { + t := *(*stack)(unsafe.Pointer(s.lo)) + stackfree(s) + s = t + } + } + + //go:nosplit + func morestackc() { + systemstack(func() { + gothrow("attempt to execute C code on Go stack") + }) + } diff --cc src/runtime/vdso_none.go index 0000000000,ac6f8cb18d..6f83ecc895 mode 000000,100644..100644 --- a/src/runtime/vdso_none.go +++ b/src/runtime/vdso_none.go @@@ -1,0 -1,10 +1,11 @@@ + // Copyright 2014 The Go Authors. All rights reserved. + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + + // +build !linux !amd64 ++// +build !linux !386 + + package runtime + + func sysargs(argc int32, argv **byte) { + }