From: Dmitry Vyukov Date: Tue, 3 Nov 2015 11:19:15 +0000 (+0100) Subject: runtime: fix finalization and profiling of tiny allocations X-Git-Tag: go1.6beta1~646 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=bf606094ee0671cdd776aea43aabb842b6c1e3af;p=gostls13.git runtime: fix finalization and profiling of tiny allocations Handling of special records for tiny allocations has two problems: 1. Once we queue a finalizer we mark the object. As the result any subsequent finalizers for the same object will not be queued during this GC cycle. If we have 16 finalizers setup (the worst case), finalization will take 16 GC cycles. This is what caused misbehave of tinyfin.go. The actual flakiness was caused by the fact that fing is asynchronous and don't always run before the check. 2. If a tiny block has both finalizer and profile specials, it is possible that we both queue finalizer, preserve the object live and free the profile record. As the result heap profile can be skewed. Fix both issues by analyzing all special records for a single object at once. Also, make tinyfin test stricter and remove reliance on real time. Also, add a test for the problem 2. Currently heap profile missed about a half of live memory. Fixes #13100 Change-Id: I9ae4dc1c44893724138a4565ca5cae29f2e97544 Reviewed-on: https://go-review.googlesource.com/16591 Reviewed-by: Austin Clements Reviewed-by: Keith Randall Run-TryBot: Dmitry Vyukov --- diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index 7c7f1e858b..02d3d6078b 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -197,6 +197,13 @@ func mSpan_Sweep(s *mspan, preserve bool) bool { } // Unlink & free special records for any objects we're about to free. + // Two complications here: + // 1. An object can have both finalizer and profile special records. + // In such case we need to queue finalizer for execution, + // mark the object as live and preserve the profile special. + // 2. A tiny object can have several finalizers setup for different offsets. + // If such object is not marked, we need to queue all finalizers at once. + // Both 1 and 2 are possible at the same time. specialp := &s.specials special := *specialp for special != nil { @@ -204,16 +211,35 @@ func mSpan_Sweep(s *mspan, preserve bool) bool { p := uintptr(s.start<<_PageShift) + uintptr(special.offset)/size*size hbits := heapBitsForAddr(p) if !hbits.isMarked() { - // Find the exact byte for which the special was setup - // (as opposed to object beginning). - p := uintptr(s.start<<_PageShift) + uintptr(special.offset) - // about to free object: splice out special record - y := special - special = special.next - *specialp = special - if !freespecial(y, unsafe.Pointer(p), size, false) { - // stop freeing of object if it has a finalizer - hbits.setMarkedNonAtomic() + // This object is not marked and has at least one special record. + // Pass 1: see if it has at least one finalizer. + hasFin := false + endOffset := p - uintptr(s.start<<_PageShift) + size + for tmp := special; tmp != nil && uintptr(tmp.offset) < endOffset; tmp = tmp.next { + if tmp.kind == _KindSpecialFinalizer { + // Stop freeing of object if it has a finalizer. + hbits.setMarkedNonAtomic() + hasFin = true + break + } + } + // Pass 2: queue all finalizers _or_ handle profile record. + for special != nil && uintptr(special.offset) < endOffset { + // Find the exact byte for which the special was setup + // (as opposed to object beginning). + p := uintptr(s.start<<_PageShift) + uintptr(special.offset) + if special.kind == _KindSpecialFinalizer || !hasFin { + // Splice out special record. + y := special + special = special.next + *specialp = special + freespecial(y, unsafe.Pointer(p), size, false) + } else { + // This is profile record, but the object has finalizers (so kept alive). + // Keep special record. + specialp = &special.next + special = *specialp + } } } else { // object is still live: keep special record diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 36e895de31..4f01aa7505 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -1143,8 +1143,7 @@ func setprofilebucket(p unsafe.Pointer, b *bucket) { // Do whatever cleanup needs to be done to deallocate s. It has // already been unlinked from the MSpan specials list. -// Returns true if we should keep working on deallocating p. -func freespecial(s *special, p unsafe.Pointer, size uintptr, freed bool) bool { +func freespecial(s *special, p unsafe.Pointer, size uintptr, freed bool) { switch s.kind { case _KindSpecialFinalizer: sf := (*specialfinalizer)(unsafe.Pointer(s)) @@ -1152,14 +1151,12 @@ func freespecial(s *special, p unsafe.Pointer, size uintptr, freed bool) bool { lock(&mheap_.speciallock) fixAlloc_Free(&mheap_.specialfinalizeralloc, unsafe.Pointer(sf)) unlock(&mheap_.speciallock) - return false // don't free p until finalizer is done case _KindSpecialProfile: sp := (*specialprofile)(unsafe.Pointer(s)) mProf_Free(sp.b, size, freed) lock(&mheap_.speciallock) fixAlloc_Free(&mheap_.specialprofilealloc, unsafe.Pointer(sp)) unlock(&mheap_.speciallock) - return true default: throw("bad special kind") panic("not reached") diff --git a/test/finprofiled.go b/test/finprofiled.go new file mode 100644 index 0000000000..0eb801a4bd --- /dev/null +++ b/test/finprofiled.go @@ -0,0 +1,74 @@ +// run + +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Test that tiny allocations with finalizers are correctly profiled. +// Previously profile special records could have been processed prematurely +// (while the object is still live). + +package main + +import ( + "runtime" + "time" + "unsafe" +) + +func main() { + runtime.MemProfileRate = 1 + // Allocate 1M 4-byte objects and set a finalizer for every third object. + // Assuming that tiny block size is 16, some objects get finalizers setup + // only for middle bytes. The finalizer resurrects that object. + // As the result, all allocated memory must stay alive. + const ( + N = 1 << 20 + tinyBlockSize = 16 // runtime._TinySize + ) + hold := make([]*int32, 0, N) + for i := 0; i < N; i++ { + x := new(int32) + if i%3 == 0 { + runtime.SetFinalizer(x, func(p *int32) { + hold = append(hold, p) + }) + } + } + // Finalize as much as possible. + // Note: the sleep only increases probility of bug detection, + // it cannot lead to false failure. + for i := 0; i < 5; i++ { + runtime.GC() + time.Sleep(10 * time.Millisecond) + } + // Read memory profile. + var prof []runtime.MemProfileRecord + for { + if n, ok := runtime.MemProfile(prof, false); ok { + prof = prof[:n] + break + } else { + prof = make([]runtime.MemProfileRecord, n+10) + } + } + // See how much memory in tiny objects is profiled. + var totalBytes int64 + for _, p := range prof { + bytes := p.AllocBytes - p.FreeBytes + nobj := p.AllocObjects - p.FreeObjects + size := bytes / nobj + if size == tinyBlockSize { + totalBytes += bytes + } + } + // 2*tinyBlockSize slack is for any boundary effects. + if want := N*int64(unsafe.Sizeof(int32(0))) - 2*tinyBlockSize; totalBytes < want { + println("got", totalBytes, "want >=", want) + panic("some of the tiny objects are not profiled") + } + // Just to keep hold alive. + if len(hold) != 0 && hold[0] == nil { + panic("bad") + } +} diff --git a/test/tinyfin.go b/test/tinyfin.go index d9ffa7cab2..5171dfc72e 100644 --- a/test/tinyfin.go +++ b/test/tinyfin.go @@ -10,7 +10,6 @@ package main import ( "runtime" - "sync/atomic" "time" ) @@ -20,39 +19,46 @@ func main() { if runtime.Compiler == "gccgo" { return } - N := int32(100) - count := N - done := make([]bool, N) - for i := int32(0); i < N; i++ { + const N = 100 + finalized := make(chan int32, N) + for i := 0; i < N; i++ { x := new(int32) // subject to tiny alloc - *x = i + *x = int32(i) // the closure must be big enough to be combined runtime.SetFinalizer(x, func(p *int32) { + finalized <- *p + }) + } + runtime.GC() + count := 0 + done := make([]bool, N) + timeout := time.After(5*time.Second) + for { + select { + case <-timeout: + println("timeout,", count, "finalized so far") + panic("not all finalizers are called") + case x := <-finalized: // Check that p points to the correct subobject of the tiny allocation. // It's a bit tricky, because we can't capture another variable // with the expected value (it would be combined as well). - if *p < 0 || *p >= N { - println("got", *p) + if x < 0 || x >= N { + println("got", x) panic("corrupted") } - if done[*p] { - println("got", *p) + if done[x] { + println("got", x) panic("already finalized") } - done[*p] = true - atomic.AddInt32(&count, -1) - }) - } - for i := 0; i < 4; i++ { - runtime.GC() - time.Sleep(10 * time.Millisecond) - } - // Some of the finalizers may not be executed, - // if the outermost allocations are combined with something persistent. - // Currently 4 int32's are combined into a 16-byte block, - // ensure that most of them are finalized. - if atomic.LoadInt32(&count) >= N/4 { - println(count, "out of", N, "finalizer are not called") - panic("not all finalizers are called") + done[x] = true + count++ + if count > N/10*9 { + // Some of the finalizers may not be executed, + // if the outermost allocations are combined with something persistent. + // Currently 4 int32's are combined into a 16-byte block, + // ensure that most of them are finalized. + return + } + } } }