From: Michael Anthony Knyszek Date: Tue, 30 Aug 2022 03:13:36 +0000 (+0000) Subject: runtime: shrink time histogram buckets X-Git-Tag: go1.20rc1~1021 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=87eda2a782db9b7ad2ec1fd335ed6c7472aa66bc;p=gostls13.git runtime: shrink time histogram buckets There are lots of useless buckets with too much precision. Introduce a minimum level of precision with a minimum bucket bit. This cuts down on the size of a time histogram dramatically (~3x). Also, pick a smaller sub bucket count; we don't need 6% precision. Also, rename super-buckets to buckets to more closely line up with HDR histogram literature. Change-Id: I199449650e4b34f2a6dca3cf1d8edb071c6655c0 Reviewed-on: https://go-review.googlesource.com/c/go/+/427615 Run-TryBot: Michael Knyszek Auto-Submit: Michael Knyszek TryBot-Result: Gopher Robot Reviewed-by: Michael Pratt --- diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index c29d64a885..93cae48211 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -1228,22 +1228,28 @@ func MSpanCountAlloc(ms *MSpan, bits []byte) int { } const ( - TimeHistSubBucketBits = timeHistSubBucketBits - TimeHistNumSubBuckets = timeHistNumSubBuckets - TimeHistNumSuperBuckets = timeHistNumSuperBuckets + TimeHistSubBucketBits = timeHistSubBucketBits + TimeHistNumSubBuckets = timeHistNumSubBuckets + TimeHistNumBuckets = timeHistNumBuckets + TimeHistMinBucketBits = timeHistMinBucketBits + TimeHistMaxBucketBits = timeHistMaxBucketBits ) type TimeHistogram timeHistogram // Counts returns the counts for the given bucket, subBucket indices. // Returns true if the bucket was valid, otherwise returns the counts -// for the underflow bucket and false. -func (th *TimeHistogram) Count(bucket, subBucket uint) (uint64, bool) { +// for the overflow bucket if bucket > 0 or the underflow bucket if +// bucket < 0, and false. +func (th *TimeHistogram) Count(bucket, subBucket int) (uint64, bool) { t := (*timeHistogram)(th) - i := bucket*TimeHistNumSubBuckets + subBucket - if i >= uint(len(t.counts)) { + if bucket < 0 { return t.underflow.Load(), false } + i := bucket*TimeHistNumSubBuckets + subBucket + if i >= len(t.counts) { + return t.overflow.Load(), false + } return t.counts[i].Load(), true } diff --git a/src/runtime/histogram.go b/src/runtime/histogram.go index d2e6367c84..43dfe61901 100644 --- a/src/runtime/histogram.go +++ b/src/runtime/histogram.go @@ -12,63 +12,77 @@ import ( const ( // For the time histogram type, we use an HDR histogram. - // Values are placed in super-buckets based solely on the most - // significant set bit. Thus, super-buckets are power-of-2 sized. + // Values are placed in buckets based solely on the most + // significant set bit. Thus, buckets are power-of-2 sized. // Values are then placed into sub-buckets based on the value of // the next timeHistSubBucketBits most significant bits. Thus, - // sub-buckets are linear within a super-bucket. + // sub-buckets are linear within a bucket. // // Therefore, the number of sub-buckets (timeHistNumSubBuckets) // defines the error. This error may be computed as // 1/timeHistNumSubBuckets*100%. For example, for 16 sub-buckets - // per super-bucket the error is approximately 6%. + // per bucket the error is approximately 6%. // - // The number of super-buckets (timeHistNumSuperBuckets), on the - // other hand, defines the range. To reserve room for sub-buckets, - // bit timeHistSubBucketBits is the first bit considered for - // super-buckets, so super-bucket indices are adjusted accordingly. + // The number of buckets (timeHistNumBuckets), on the + // other hand, defines the range. To avoid producing a large number + // of buckets that are close together, especially for small numbers + // (e.g. 1, 2, 3, 4, 5 ns) that aren't very useful, timeHistNumBuckets + // is defined in terms of the least significant bit (timeHistMinBucketBits) + // that needs to be set before we start bucketing and the most + // significant bit (timeHistMaxBucketBits) that we bucket before we just + // dump it into a catch-all bucket. // - // As an example, consider 45 super-buckets with 16 sub-buckets. + // As an example, consider the configuration: // - // 00110 - // ^---- - // │ ^ - // │ └---- Lowest 4 bits -> sub-bucket 6 - // └------- Bit 4 unset -> super-bucket 0 + // timeHistMinBucketBits = 9 + // timeHistMaxBucketBits = 48 + // timeHistSubBucketBits = 2 // - // 10110 - // ^---- - // │ ^ - // │ └---- Next 4 bits -> sub-bucket 6 - // └------- Bit 4 set -> super-bucket 1 - // 100010 - // ^----^ - // │ ^ └-- Lower bits ignored - // │ └---- Next 4 bits -> sub-bucket 1 - // └------- Bit 5 set -> super-bucket 2 + // Then: // - // Following this pattern, super-bucket 44 will have the bit 47 set. We don't - // have any buckets for higher values, so the highest sub-bucket will - // contain values of 2^48-1 nanoseconds or approx. 3 days. This range is - // more than enough to handle durations produced by the runtime. - timeHistSubBucketBits = 4 - timeHistNumSubBuckets = 1 << timeHistSubBucketBits - timeHistNumSuperBuckets = 45 - timeHistTotalBuckets = timeHistNumSuperBuckets*timeHistNumSubBuckets + 1 + // 011000001 + // ^-- + // │ ^ + // │ └---- Next 2 bits -> sub-bucket 3 + // └------- Bit 9 unset -> bucket 0 + // + // 110000001 + // ^-- + // │ ^ + // │ └---- Next 2 bits -> sub-bucket 2 + // └------- Bit 9 set -> bucket 1 + // + // 1000000010 + // ^-- ^ + // │ ^ └-- Lower bits ignored + // │ └---- Next 2 bits -> sub-bucket 0 + // └------- Bit 10 set -> bucket 2 + // + // Following this pattern, bucket 38 will have the bit 46 set. We don't + // have any buckets for higher values, so we spill the rest into an overflow + // bucket containing values of 2^47-1 nanoseconds or approx. 1 day or more. + // This range is more than enough to handle durations produced by the runtime. + timeHistMinBucketBits = 9 + timeHistMaxBucketBits = 48 // Note that this is exclusive; 1 higher than the actual range. + timeHistSubBucketBits = 2 + timeHistNumSubBuckets = 1 << timeHistSubBucketBits + timeHistNumBuckets = timeHistMaxBucketBits - timeHistMinBucketBits + 1 + // Two extra buckets, one for underflow, one for overflow. + timeHistTotalBuckets = timeHistNumBuckets*timeHistNumSubBuckets + 2 ) // timeHistogram represents a distribution of durations in // nanoseconds. // // The accuracy and range of the histogram is defined by the -// timeHistSubBucketBits and timeHistNumSuperBuckets constants. +// timeHistSubBucketBits and timeHistNumBuckets constants. // // It is an HDR histogram with exponentially-distributed // buckets and linearly distributed sub-buckets. // // The histogram is safe for concurrent reads and writes. type timeHistogram struct { - counts [timeHistNumSuperBuckets * timeHistNumSubBuckets]atomic.Uint64 + counts [timeHistNumBuckets * timeHistNumSubBuckets]atomic.Uint64 // underflow counts all the times we got a negative duration // sample. Because of how time works on some platforms, it's @@ -76,6 +90,10 @@ type timeHistogram struct { // but we record them anyway because it's better to have some // signal that it's happening than just missing samples. underflow atomic.Uint64 + + // overflow counts all the times we got a duration that exceeded + // the range counts represents. + overflow atomic.Uint64 } // record adds the given duration to the distribution. @@ -85,36 +103,35 @@ type timeHistogram struct { // //go:nosplit func (h *timeHistogram) record(duration int64) { + // If the duration is negative, capture that in underflow. if duration < 0 { h.underflow.Add(1) return } - // The index of the exponential bucket is just the index - // of the highest set bit adjusted for how many bits we - // use for the subbucket. Note that it's timeHistSubBucketsBits-1 - // because we use the 0th bucket to hold values < timeHistNumSubBuckets. - var superBucket, subBucket uint - if duration >= timeHistNumSubBuckets { - // At this point, we know the duration value will always be - // at least timeHistSubBucketsBits long. - superBucket = uint(sys.Len64(uint64(duration))) - timeHistSubBucketBits - if superBucket*timeHistNumSubBuckets >= uint(len(h.counts)) { - // The bucket index we got is larger than what we support, so - // include this count in the highest bucket, which extends to - // infinity. - superBucket = timeHistNumSuperBuckets - 1 - subBucket = timeHistNumSubBuckets - 1 - } else { - // The linear subbucket index is just the timeHistSubBucketsBits - // bits after the top bit. To extract that value, shift down - // the duration such that we leave the top bit and the next bits - // intact, then extract the index. - subBucket = uint((duration >> (superBucket - 1)) % timeHistNumSubBuckets) - } + // bucketBit is the target bit for the bucket which is usually the + // highest 1 bit, but if we're less than the minimum, is the highest + // 1 bit of the minimum (which will be zero in the duration). + // + // bucket is the bucket index, which is the bucketBit minus the + // highest bit of the minimum, plus one to leave room for the catch-all + // bucket for samples lower than the minimum. + var bucketBit, bucket uint + if l := sys.Len64(uint64(duration)); l < timeHistMinBucketBits { + bucketBit = timeHistMinBucketBits + bucket = 0 // bucketBit - timeHistMinBucketBits } else { - subBucket = uint(duration) + bucketBit = uint(l) + bucket = bucketBit - timeHistMinBucketBits + 1 + } + // If the bucket we computed is greater than the number of buckets, + // count that in overflow. + if bucket >= timeHistNumBuckets { + h.overflow.Add(1) + return } - h.counts[superBucket*timeHistNumSubBuckets+subBucket].Add(1) + // The sub-bucket index is just next timeHistSubBucketBits after the bucketBit. + subBucket := uint(duration>>(bucketBit-1-timeHistSubBucketBits)) % timeHistNumSubBuckets + h.counts[bucket*timeHistNumSubBuckets+subBucket].Add(1) } const ( @@ -137,33 +154,37 @@ func float64NegInf() float64 { // not nanoseconds like the timeHistogram represents durations. func timeHistogramMetricsBuckets() []float64 { b := make([]float64, timeHistTotalBuckets+1) + // Underflow bucket. b[0] = float64NegInf() - // Super-bucket 0 has no bits above timeHistSubBucketBits - // set, so just iterate over each bucket and assign the - // incrementing bucket. - for i := 0; i < timeHistNumSubBuckets; i++ { - bucketNanos := uint64(i) - b[i+1] = float64(bucketNanos) / 1e9 + + for j := 0; j < timeHistNumSubBuckets; j++ { + // No bucket bit for the first few buckets. Just sub-bucket bits after the + // min bucket bit. + bucketNanos := uint64(j) << (timeHistMinBucketBits - 1 - timeHistSubBucketBits) + // Convert nanoseconds to seconds via a division. + // These values will all be exactly representable by a float64. + b[j+1] = float64(bucketNanos) / 1e9 } - // Generate the rest of the super-buckets. It's easier to reason - // about if we cut out the 0'th bucket, so subtract one since - // we just handled that bucket. - for i := 0; i < timeHistNumSuperBuckets-1; i++ { + // Generate the rest of the buckets. It's easier to reason + // about if we cut out the 0'th bucket. + for i := timeHistMinBucketBits; i < timeHistMaxBucketBits; i++ { for j := 0; j < timeHistNumSubBuckets; j++ { - // Set the super-bucket bit. - bucketNanos := uint64(1) << (i + timeHistSubBucketBits) + // Set the bucket bit. + bucketNanos := uint64(1) << (i - 1) // Set the sub-bucket bits. - bucketNanos |= uint64(j) << i - // The index for this bucket is going to be the (i+1)'th super bucket - // (note that we're starting from zero, but handled the first super-bucket + bucketNanos |= uint64(j) << (i - 1 - timeHistSubBucketBits) + // The index for this bucket is going to be the (i+1)'th bucket + // (note that we're starting from zero, but handled the first bucket // earlier, so we need to compensate), and the j'th sub bucket. // Add 1 because we left space for -Inf. - bucketIndex := (i+1)*timeHistNumSubBuckets + j + 1 + bucketIndex := (i-timeHistMinBucketBits+1)*timeHistNumSubBuckets + j + 1 // Convert nanoseconds to seconds via a division. // These values will all be exactly representable by a float64. b[bucketIndex] = float64(bucketNanos) / 1e9 } } + // Overflow bucket. + b[len(b)-2] = float64(uint64(1)<<(timeHistMaxBucketBits-1)) / 1e9 b[len(b)-1] = float64Inf() return b } diff --git a/src/runtime/histogram_test.go b/src/runtime/histogram_test.go index b12b65a41e..5246e86810 100644 --- a/src/runtime/histogram_test.go +++ b/src/runtime/histogram_test.go @@ -20,50 +20,54 @@ func TestTimeHistogram(t *testing.T) { h := &dummyTimeHistogram // Record exactly one sample in each bucket. - for i := 0; i < TimeHistNumSuperBuckets; i++ { - var base int64 - if i > 0 { - base = int64(1) << (i + TimeHistSubBucketBits - 1) + for j := 0; j < TimeHistNumSubBuckets; j++ { + v := int64(j) << (TimeHistMinBucketBits - 1 - TimeHistSubBucketBits) + for k := 0; k < j; k++ { + // Record a number of times equal to the bucket index. + h.Record(v) } + } + for i := TimeHistMinBucketBits; i < TimeHistMaxBucketBits; i++ { + base := int64(1) << (i - 1) for j := 0; j < TimeHistNumSubBuckets; j++ { - v := int64(j) - if i > 0 { - v <<= i - 1 + v := int64(j) << (i - 1 - TimeHistSubBucketBits) + for k := 0; k < (i+1-TimeHistMinBucketBits)*TimeHistNumSubBuckets+j; k++ { + // Record a number of times equal to the bucket index. + h.Record(base + v) } - h.Record(base + v) } } - // Hit the underflow bucket. + // Hit the underflow and overflow buckets. h.Record(int64(-1)) + h.Record(math.MaxInt64) + h.Record(math.MaxInt64) // Check to make sure there's exactly one count in each // bucket. - for i := uint(0); i < TimeHistNumSuperBuckets; i++ { - for j := uint(0); j < TimeHistNumSubBuckets; j++ { + for i := 0; i < TimeHistNumBuckets; i++ { + for j := 0; j < TimeHistNumSubBuckets; j++ { c, ok := h.Count(i, j) if !ok { - t.Errorf("hit underflow bucket unexpectedly: (%d, %d)", i, j) - } else if c != 1 { - t.Errorf("bucket (%d, %d) has count that is not 1: %d", i, j, c) + t.Errorf("unexpected invalid bucket: (%d, %d)", i, j) + } else if idx := uint64(i*TimeHistNumSubBuckets + j); c != idx { + t.Errorf("bucket (%d, %d) has count that is not %d: %d", i, j, idx, c) } } } - c, ok := h.Count(TimeHistNumSuperBuckets, 0) + c, ok := h.Count(-1, 0) if ok { - t.Errorf("expected to hit underflow bucket: (%d, %d)", TimeHistNumSuperBuckets, 0) + t.Errorf("expected to hit underflow bucket: (%d, %d)", -1, 0) } if c != 1 { - t.Errorf("underflow bucket has count that is not 1: %d", c) + t.Errorf("overflow bucket has count that is not 1: %d", c) } - // Check overflow behavior. - // By hitting a high value, we should just be adding into the highest bucket. - h.Record(math.MaxInt64) - c, ok = h.Count(TimeHistNumSuperBuckets-1, TimeHistNumSubBuckets-1) - if !ok { - t.Error("hit underflow bucket in highest bucket unexpectedly") - } else if c != 2 { - t.Errorf("highest has count that is not 2: %d", c) + c, ok = h.Count(TimeHistNumBuckets+1, 0) + if ok { + t.Errorf("expected to hit overflow bucket: (%d, %d)", TimeHistNumBuckets+1, 0) + } + if c != 2 { + t.Errorf("overflow bucket has count that is not 2: %d", c) } dummyTimeHistogram = TimeHistogram{} @@ -72,34 +76,32 @@ func TestTimeHistogram(t *testing.T) { func TestTimeHistogramMetricsBuckets(t *testing.T) { buckets := TimeHistogramMetricsBuckets() - nonInfBucketsLen := TimeHistNumSubBuckets * TimeHistNumSuperBuckets - expBucketsLen := nonInfBucketsLen + 2 // Count -Inf and +Inf. + nonInfBucketsLen := TimeHistNumSubBuckets * TimeHistNumBuckets + expBucketsLen := nonInfBucketsLen + 3 // Count -Inf, the edge for the overflow bucket, and +Inf. if len(buckets) != expBucketsLen { t.Fatalf("unexpected length of buckets: got %d, want %d", len(buckets), expBucketsLen) } - // Check the first non-Inf 2*TimeHistNumSubBuckets buckets in order, skipping the - // first bucket which should be -Inf (checked later). - // - // Because of the way this scheme works, the bottom TimeHistNumSubBuckets - // buckets are fully populated, and then the next TimeHistNumSubBuckets - // have the TimeHistSubBucketBits'th bit set, while the bottom are once - // again fully populated. - for i := 1; i <= 2*TimeHistNumSubBuckets+1; i++ { - if got, want := buckets[i], float64(i-1)/1e9; got != want { - t.Errorf("expected bucket %d to have value %e, got %e", i, want, got) - } - } // Check some values. idxToBucket := map[int]float64{ 0: math.Inf(-1), - 33: float64(0x10<<1) / 1e9, - 34: float64(0x11<<1) / 1e9, - 49: float64(0x10<<2) / 1e9, - 58: float64(0x19<<2) / 1e9, - 65: float64(0x10<<3) / 1e9, - 513: float64(0x10<<31) / 1e9, - 519: float64(0x16<<31) / 1e9, - expBucketsLen - 2: float64(0x1f<<43) / 1e9, + 1: 0.0, + 2: float64(0x040) / 1e9, + 3: float64(0x080) / 1e9, + 4: float64(0x0c0) / 1e9, + 5: float64(0x100) / 1e9, + 6: float64(0x140) / 1e9, + 7: float64(0x180) / 1e9, + 8: float64(0x1c0) / 1e9, + 9: float64(0x200) / 1e9, + 10: float64(0x280) / 1e9, + 11: float64(0x300) / 1e9, + 12: float64(0x380) / 1e9, + 13: float64(0x400) / 1e9, + 15: float64(0x600) / 1e9, + 81: float64(0x8000000) / 1e9, + 82: float64(0xa000000) / 1e9, + 108: float64(0x380000000) / 1e9, + expBucketsLen - 2: float64(0x1<<47) / 1e9, expBucketsLen - 1: math.Inf(1), } for idx, bucket := range idxToBucket { diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go index 313850a3a0..2271d8084d 100644 --- a/src/runtime/metrics.go +++ b/src/runtime/metrics.go @@ -200,6 +200,7 @@ func initMetrics() { for i := range memstats.gcPauseDist.counts { hist.counts[i+1] = memstats.gcPauseDist.counts[i].Load() } + hist.counts[len(hist.counts)-1] = memstats.gcPauseDist.overflow.Load() }, }, "/gc/stack/starting-size:bytes": { @@ -330,6 +331,7 @@ func initMetrics() { for i := range sched.timeToRun.counts { hist.counts[i+1] = sched.timeToRun.counts[i].Load() } + hist.counts[len(hist.counts)-1] = sched.timeToRun.overflow.Load() }, }, }