var FuncPC = funcPC
+var Fastlog2 = fastlog2
+
type LFNode struct {
Next uint64
Pushcnt uintptr
--- /dev/null
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// fastlog2 implements a fast approximation to the base 2 log of a
+// float64. This is used to compute a geometric distribution for heap
+// sampling, without introducing dependences into package math. This
+// uses a very rough approximation using the float64 exponent and the
+// first 25 bits of the mantissa. The top 5 bits of the mantissa are
+// used to load limits from a table of constants and the rest are used
+// to scale linearly between them.
+func fastlog2(x float64) float64 {
+ const fastlogScaleBits = 20
+ const fastlogScaleRatio = 1.0 / (1 << fastlogScaleBits)
+
+ xBits := float64bits(x)
+ // Extract the exponent from the IEEE float64, and index a constant
+ // table with the first 10 bits from the mantissa.
+ xExp := int64((xBits>>52)&0x7FF) - 1023
+ xManIndex := (xBits >> (52 - fastlogNumBits)) % (1 << fastlogNumBits)
+ xManScale := (xBits >> (52 - fastlogNumBits - fastlogScaleBits)) % (1 << fastlogScaleBits)
+
+ low, high := fastlog2Table[xManIndex], fastlog2Table[xManIndex+1]
+ return float64(xExp) + low + (high-low)*float64(xManScale)*fastlogScaleRatio
+}
+
+// float64bits returns the IEEE 754 binary representation of f.
+// Taken from math.Float64bits to avoid dependences into package math.
+func float64bits(f float64) uint64 { return *(*uint64)(unsafe.Pointer(&f)) }
--- /dev/null
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "math"
+ "runtime"
+ "testing"
+)
+
+func TestFastLog2(t *testing.T) {
+ // Compute the euclidean distance between math.Log2 and the FastLog2
+ // implementation over the range of interest for heap sampling.
+ const randomBitCount = 26
+ var e float64
+ for i := 1; i < 1<<randomBitCount; i++ {
+ l, fl := math.Log2(float64(i)), runtime.Fastlog2(float64(i))
+ d := l - fl
+ e += d * d
+ }
+ e = math.Sqrt(e)
+
+ if e > 1.0 {
+ t.Fatalf("imprecision on fastlog2 implementation, want <=1.0, got %f", e)
+ }
+}
--- /dev/null
+// AUTO-GENERATED by mkfastlog2table.go
+// Run go generate from src/runtime to update.
+// See mkfastlog2table.go for comments.
+
+package runtime
+
+const fastlogNumBits = 5
+
+var fastlog2Table = [1<<fastlogNumBits + 1]float64{
+ 0,
+ 0.0443941193584535,
+ 0.08746284125033943,
+ 0.12928301694496647,
+ 0.16992500144231248,
+ 0.2094533656289499,
+ 0.24792751344358555,
+ 0.28540221886224837,
+ 0.3219280948873623,
+ 0.3575520046180837,
+ 0.39231742277876036,
+ 0.4262647547020979,
+ 0.4594316186372973,
+ 0.4918530963296748,
+ 0.5235619560570128,
+ 0.5545888516776374,
+ 0.5849625007211563,
+ 0.6147098441152082,
+ 0.6438561897747247,
+ 0.6724253419714956,
+ 0.7004397181410922,
+ 0.7279204545631992,
+ 0.7548875021634686,
+ 0.7813597135246596,
+ 0.8073549220576042,
+ 0.8328900141647417,
+ 0.8579809951275721,
+ 0.8826430493618412,
+ 0.9068905956085185,
+ 0.9307373375628862,
+ 0.9541963103868752,
+ 0.9772799234999164,
+ 1,
+}
}
func profilealloc(mp *m, x unsafe.Pointer, size uintptr) {
- c := mp.mcache
- rate := MemProfileRate
- if size < uintptr(rate) {
- // pick next profile time
- // If you change this, also change allocmcache.
- if rate > 0x3fffffff { // make 2*rate not overflow
- rate = 0x3fffffff
- }
- next := int32(fastrand1()) % (2 * int32(rate))
- // Subtract the "remainder" of the current allocation.
- // Otherwise objects that are close in size to sampling rate
- // will be under-sampled, because we consistently discard this remainder.
- next -= (int32(size) - c.next_sample)
- if next < 0 {
- next = 0
- }
- c.next_sample = next
- }
-
+ mp.mcache.next_sample = nextSample()
mProf_Malloc(x, size)
}
+// nextSample returns the next sampling point for heap profiling.
+// It produces a random variable with a geometric distribution and
+// mean MemProfileRate. This is done by generating a uniformly
+// distributed random number and applying the cumulative distribution
+// function for an exponential.
+func nextSample() int32 {
+ period := MemProfileRate
+
+ // make nextSample not overflow. Maximum possible step is
+ // -ln(1/(1<<kRandomBitCount)) * period, approximately 20 * period.
+ switch {
+ case period > 0x7000000:
+ period = 0x7000000
+ case period == 0:
+ return 0
+ }
+
+ // Let m be the sample rate,
+ // the probability distribution function is m*exp(-mx), so the CDF is
+ // p = 1 - exp(-mx), so
+ // q = 1 - p == exp(-mx)
+ // log_e(q) = -mx
+ // -log_e(q)/m = x
+ // x = -log_e(q) * period
+ // x = log_2(q) * (-log_e(2)) * period ; Using log_2 for efficiency
+ const randomBitCount = 26
+ q := uint32(fastrand1())%(1<<randomBitCount) + 1
+ qlog := fastlog2(float64(q)) - randomBitCount
+ if qlog > 0 {
+ qlog = 0
+ }
+ const minusLog2 = -0.6931471805599453 // -ln(2)
+ return int32(qlog*(minusLog2*float64(period))) + 1
+}
+
type persistentAlloc struct {
base unsafe.Pointer
off uintptr
for i := 0; i < _NumSizeClasses; i++ {
c.alloc[i] = &emptymspan
}
-
- // Set first allocation sample size.
- rate := MemProfileRate
- if rate > 0x3fffffff { // make 2*rate not overflow
- rate = 0x3fffffff
- }
- if rate != 0 {
- c.next_sample = int32(int(fastrand1()) % (2 * rate))
- }
-
+ c.next_sample = nextSample()
return c
}
--- /dev/null
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// fastlog2Table contains log2 approximations for 5 binary digits.
+// This is used to implement fastlog2, which is used for heap sampling.
+
+package main
+
+import (
+ "bytes"
+ "fmt"
+ "io/ioutil"
+ "log"
+ "math"
+)
+
+func main() {
+ var buf bytes.Buffer
+
+ fmt.Fprintln(&buf, "// AUTO-GENERATED by mkfastlog2table.go")
+ fmt.Fprintln(&buf, "// Run go generate from src/runtime to update.")
+ fmt.Fprintln(&buf, "// See mkfastlog2table.go for comments.")
+ fmt.Fprintln(&buf)
+ fmt.Fprintln(&buf, "package runtime")
+ fmt.Fprintln(&buf)
+ fmt.Fprintln(&buf, "const fastlogNumBits =", fastlogNumBits)
+ fmt.Fprintln(&buf)
+
+ fmt.Fprintln(&buf, "var fastlog2Table = [1<<fastlogNumBits + 1]float64{")
+ table := computeTable()
+ for _, t := range table {
+ fmt.Fprintf(&buf, "\t%v,\n", t)
+ }
+ fmt.Fprintln(&buf, "}")
+
+ if err := ioutil.WriteFile("fastlog2table.go", buf.Bytes(), 0644); err != nil {
+ log.Fatalln(err)
+ }
+}
+
+const fastlogNumBits = 5
+
+func computeTable() []float64 {
+ fastlog2Table := make([]float64, 1<<fastlogNumBits+1)
+ for i := 0; i <= (1 << fastlogNumBits); i++ {
+ fastlog2Table[i] = math.Log2(1.0 + float64(i)/(1<<fastlogNumBits))
+ }
+ return fastlog2Table
+}
//go:generate go run wincallback.go
//go:generate go run mkduff.go
+//go:generate go run mkfastlog2table.go
var ticks struct {
lock mutex
--- /dev/null
+// run
+
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test heap sampling logic.
+
+package main
+
+import (
+ "fmt"
+ "math"
+ "runtime"
+)
+
+var a16 *[16]byte
+var a512 *[512]byte
+var a256 *[256]byte
+var a1k *[1024]byte
+var a64k *[64 * 1024]byte
+
+// This test checks that heap sampling produces reasonable
+// results. Note that heap sampling uses randomization, so the results
+// vary for run to run. This test only checks that the resulting
+// values appear reasonable.
+func main() {
+ const countInterleaved = 10000
+ allocInterleaved(countInterleaved)
+ checkAllocations(getMemProfileRecords(), "main.allocInterleaved", countInterleaved, []int64{256 * 1024, 1024, 256 * 1024, 512, 256 * 1024, 256})
+
+ const count = 100000
+ alloc(count)
+ checkAllocations(getMemProfileRecords(), "main.alloc", count, []int64{1024, 512, 256})
+}
+
+// allocInterleaved stress-tests the heap sampling logic by
+// interleaving large and small allocations.
+func allocInterleaved(n int) {
+ for i := 0; i < n; i++ {
+ // Test verification depends on these lines being contiguous.
+ a64k = new([64 * 1024]byte)
+ a1k = new([1024]byte)
+ a64k = new([64 * 1024]byte)
+ a512 = new([512]byte)
+ a64k = new([64 * 1024]byte)
+ a256 = new([256]byte)
+ }
+}
+
+// alloc performs only small allocations for sanity testing.
+func alloc(n int) {
+ for i := 0; i < n; i++ {
+ // Test verification depends on these lines being contiguous.
+ a1k = new([1024]byte)
+ a512 = new([512]byte)
+ a256 = new([256]byte)
+ }
+}
+
+// checkAllocations validates that the profile records collected for
+// the named function are consistent with count contiguous allocations
+// of the specified sizes.
+func checkAllocations(records []runtime.MemProfileRecord, fname string, count int64, size []int64) {
+ a := allocObjects(records, fname)
+ firstLine := 0
+ for ln := range a {
+ if firstLine == 0 || firstLine > ln {
+ firstLine = ln
+ }
+ }
+ var totalcount int64
+ for i, w := range size {
+ ln := firstLine + i
+ s := a[ln]
+ checkValue(fname, ln, "objects", count, s.objects)
+ checkValue(fname, ln, "bytes", count*w, s.bytes)
+ totalcount += s.objects
+ }
+ // Check the total number of allocations, to ensure some sampling occurred.
+ if totalwant := count * int64(len(size)); totalcount <= 0 || totalcount > totalwant*1024 {
+ panic(fmt.Sprintf("%s want total count > 0 && <= %d, got %d", fname, totalwant*1024, totalcount))
+ }
+}
+
+// checkValue checks an unsampled value against a range.
+func checkValue(fname string, ln int, name string, want, got int64) {
+ if got < 0 || got > 1024*want {
+ panic(fmt.Sprintf("%s:%d want %s >= 0 && <= %d, got %d", fname, ln, name, 1024*want, got))
+ }
+}
+
+func getMemProfileRecords() []runtime.MemProfileRecord {
+ // Find out how many records there are (MemProfile(nil, true)),
+ // allocate that many records, and get the data.
+ // There's a race—more records might be added between
+ // the two calls—so allocate a few extra records for safety
+ // and also try again if we're very unlucky.
+ // The loop should only execute one iteration in the common case.
+ var p []runtime.MemProfileRecord
+ n, ok := runtime.MemProfile(nil, true)
+ for {
+ // Allocate room for a slightly bigger profile,
+ // in case a few more entries have been added
+ // since the call to MemProfile.
+ p = make([]runtime.MemProfileRecord, n+50)
+ n, ok = runtime.MemProfile(p, true)
+ if ok {
+ p = p[0:n]
+ break
+ }
+ // Profile grew; try again.
+ }
+ return p
+}
+
+type allocStat struct {
+ bytes, objects int64
+}
+
+// allocObjects examines the profile records for the named function
+// and returns the allocation stats aggregated by source line number.
+func allocObjects(records []runtime.MemProfileRecord, function string) map[int]allocStat {
+ a := make(map[int]allocStat)
+ for _, r := range records {
+ for _, s := range r.Stack0 {
+ if s == 0 {
+ break
+ }
+ if f := runtime.FuncForPC(s); f != nil {
+ name := f.Name()
+ _, line := f.FileLine(s)
+ if name == function {
+ allocStat := a[line]
+ allocStat.bytes += r.AllocBytes
+ allocStat.objects += r.AllocObjects
+ a[line] = allocStat
+ }
+ }
+ }
+ }
+ for line, stats := range a {
+ objects, bytes := scaleHeapSample(stats.objects, stats.bytes, int64(runtime.MemProfileRate))
+ a[line] = allocStat{bytes, objects}
+ }
+ return a
+}
+
+// scaleHeapSample unsamples heap allocations.
+// Taken from src/cmd/pprof/internal/profile/legacy_profile.go
+func scaleHeapSample(count, size, rate int64) (int64, int64) {
+ if count == 0 || size == 0 {
+ return 0, 0
+ }
+
+ if rate <= 1 {
+ // if rate==1 all samples were collected so no adjustment is needed.
+ // if rate<1 treat as unknown and skip scaling.
+ return count, size
+ }
+
+ avgSize := float64(size) / float64(count)
+ scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
+
+ return int64(float64(count) * scale), int64(float64(size) * scale)
+}