From: Austin Clements Date: Fri, 28 Jul 2017 20:26:51 +0000 (-0400) Subject: internal/trace: add "per-P" MMU analysis X-Git-Tag: go1.12beta1~490 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=bef4efc822794ea2e7310756bc546bf6930fc066;p=gostls13.git internal/trace: add "per-P" MMU analysis The current MMU analysis considers all Ps together, so if, for example, one of four Ps is blocked, mutator utilization is 75%. However, this is less useful for understanding the impact on individual goroutines because that one blocked goroutine could be blocked for a very long time, but we still appear to have good utilization. Hence, this introduces a new flag that does a "per-P" analysis where the utilization of each P is considered independently. The MMU is then the combination of the MMU for each P's utilization function. Change-Id: Id67b980d4d82b511d28300cdf92ccbb5ae8f0c78 Reviewed-on: https://go-review.googlesource.com/c/60797 Run-TryBot: Austin Clements TryBot-Result: Gobot Gobot Reviewed-by: Hyang-Ah Hana Kim --- diff --git a/src/cmd/trace/mmu.go b/src/cmd/trace/mmu.go index 2a07be4ba2..d3b6768686 100644 --- a/src/cmd/trace/mmu.go +++ b/src/cmd/trace/mmu.go @@ -27,12 +27,12 @@ func init() { var mmuCache struct { init sync.Once - util []trace.MutatorUtil + util [][]trace.MutatorUtil mmuCurve *trace.MMUCurve err error } -func getMMUCurve() ([]trace.MutatorUtil, *trace.MMUCurve, error) { +func getMMUCurve() ([][]trace.MutatorUtil, *trace.MMUCurve, error) { mmuCache.init.Do(func() { tr, err := parseTrace() if err != nil { @@ -69,7 +69,16 @@ func httpMMUPlot(w http.ResponseWriter, r *http.Request) { // Cover six orders of magnitude. xMax := xMin * 1e6 // But no more than the length of the trace. - if maxMax := time.Duration(mu[len(mu)-1].Time - mu[0].Time); xMax > maxMax { + minEvent, maxEvent := mu[0][0].Time, mu[0][len(mu[0])-1].Time + for _, mu1 := range mu[1:] { + if mu1[0].Time < minEvent { + minEvent = mu1[0].Time + } + if mu1[len(mu1)-1].Time > maxEvent { + maxEvent = mu1[len(mu1)-1].Time + } + } + if maxMax := time.Duration(maxEvent - minEvent); xMax > maxMax { xMax = maxMax } // Compute MMU curve. diff --git a/src/internal/traceparser/gc.go b/src/internal/traceparser/gc.go index ab0c640e26..569ab86b82 100644 --- a/src/internal/traceparser/gc.go +++ b/src/internal/traceparser/gc.go @@ -36,27 +36,64 @@ const ( UtilAssist // UtilSweep means utilization should account for sweeping. UtilSweep + + // UtilPerProc means each P should be given a separate + // utilization function. Otherwise, there is a single function + // and each P is given a fraction of the utilization. + UtilPerProc ) -// MutatorUtilization returns the mutator utilization function for the -// given trace. This function will always end with 0 utilization. The -// bounds of the function are implicit in the first and last event; -// outside of these bounds the function is undefined. -func (p *Parsed) MutatorUtilization(flags UtilFlags) []MutatorUtil { +// MutatorUtilization returns a set of mutator utilization functions +// for the given trace. Each function will always end with 0 +// utilization. The bounds of each function are implicit in the first +// and last event; outside of these bounds each function is undefined. +// +// If the UtilPerProc flag is not given, this always returns a single +// utilization function. Otherwise, it returns one function per P. +func (p *Parsed) MutatorUtilization(flags UtilFlags) [][]MutatorUtil { events := p.Events if len(events) == 0 { return nil } - gomaxprocs, gcPs, stw := 1, 0, 0 - out := []MutatorUtil{{events[0].Ts, 1}} + type perP struct { + // gc > 0 indicates that GC is active on this P. + gc int + // series the logical series number for this P. This + // is necessary because Ps may be removed and then + // re-added, and then the new P needs a new series. + series int + } + ps := []perP{} + stw := 0 + + out := [][]MutatorUtil{} assists := map[uint64]bool{} block := map[uint64]*Event{} bgMark := map[uint64]bool{} + for _, ev := range events { switch ev.Type { case EvGomaxprocs: - gomaxprocs = int(ev.Args[0]) + gomaxprocs := int(ev.Args[0]) + if len(ps) > gomaxprocs { + if flags&UtilPerProc != 0 { + // End each P's series. + for _, p := range ps[gomaxprocs:] { + out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, 0}) + } + } + ps = ps[:gomaxprocs] + } + for len(ps) < gomaxprocs { + // Start new P's series. + series := 0 + if flags&UtilPerProc != 0 || len(out) == 0 { + series = len(out) + out = append(out, []MutatorUtil{{ev.Ts, 1}}) + } + ps = append(ps, perP{series: series}) + } case EvGCSTWStart: if flags&UtilSTW != 0 { stw++ @@ -67,33 +104,41 @@ func (p *Parsed) MutatorUtilization(flags UtilFlags) []MutatorUtil { } case EvGCMarkAssistStart: if flags&UtilAssist != 0 { - gcPs++ + ps[ev.P].gc++ assists[ev.G] = true } case EvGCMarkAssistDone: if flags&UtilAssist != 0 { - gcPs-- + ps[ev.P].gc-- delete(assists, ev.G) } case EvGCSweepStart: if flags&UtilSweep != 0 { - gcPs++ + ps[ev.P].gc++ } case EvGCSweepDone: if flags&UtilSweep != 0 { - gcPs-- + ps[ev.P].gc-- } case EvGoStartLabel: if flags&UtilBackground != 0 && strings.HasPrefix(ev.SArgs[0], "GC ") && ev.SArgs[0] != "GC (idle)" { // Background mark worker. - bgMark[ev.G] = true - gcPs++ + // + // If we're in per-proc mode, we don't + // count dedicated workers because + // they kick all of the goroutines off + // that P, so don't directly + // contribute to goroutine latency. + if !(flags&UtilPerProc != 0 && ev.SArgs[0] == "GC (dedicated)") { + bgMark[ev.G] = true + ps[ev.P].gc++ + } } fallthrough case EvGoStart: if assists[ev.G] { // Unblocked during assist. - gcPs++ + ps[ev.P].gc++ } block[ev.G] = ev.Link default: @@ -103,49 +148,77 @@ func (p *Parsed) MutatorUtilization(flags UtilFlags) []MutatorUtil { if assists[ev.G] { // Blocked during assist. - gcPs-- + ps[ev.P].gc-- } if bgMark[ev.G] { // Background mark worker done. - gcPs-- + ps[ev.P].gc-- delete(bgMark, ev.G) } delete(block, ev.G) } - ps := gcPs - if stw > 0 { - ps = gomaxprocs - } - mu := MutatorUtil{ev.Ts, 1 - float64(ps)/float64(gomaxprocs)} - if mu.Util == out[len(out)-1].Util { - // No change. - continue - } - if mu.Time == out[len(out)-1].Time { - // Take the lowest utilization at a time stamp. - if mu.Util < out[len(out)-1].Util { - out[len(out)-1] = mu + if flags&UtilPerProc == 0 { + // Compute the current average utilization. + if len(ps) == 0 { + continue } + gcPs := 0 + if stw > 0 { + gcPs = len(ps) + } else { + for i := range ps { + if ps[i].gc > 0 { + gcPs++ + } + } + } + mu := MutatorUtil{ev.Ts, 1 - float64(gcPs)/float64(len(ps))} + + // Record the utilization change. (Since + // len(ps) == len(out), we know len(out) > 0.) + out[0] = addUtil(out[0], mu) } else { - out = append(out, mu) + // Check for per-P utilization changes. + for i := range ps { + p := &ps[i] + util := 1.0 + if stw > 0 || p.gc > 0 { + util = 0.0 + } + out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, util}) + } } } - // Add final 0 utilization event. This is important to mark - // the end of the trace. The exact value shouldn't matter - // since no window should extend beyond this, but using 0 is - // symmetric with the start of the trace. - endTime := events[len(events)-1].Ts - if out[len(out)-1].Time == endTime { - out[len(out)-1].Util = 0 - } else { - out = append(out, MutatorUtil{endTime, 0}) + // Add final 0 utilization event to any remaining series. This + // is important to mark the end of the trace. The exact value + // shouldn't matter since no window should extend beyond this, + // but using 0 is symmetric with the start of the trace. + mu := MutatorUtil{events[len(events)-1].Ts, 0} + for i := range ps { + out[ps[i].series] = addUtil(out[ps[i].series], mu) } - return out } +func addUtil(util []MutatorUtil, mu MutatorUtil) []MutatorUtil { + if len(util) > 0 { + if mu.Util == util[len(util)-1].Util { + // No change. + return util + } + if mu.Time == util[len(util)-1].Time { + // Take the lowest utilization at a time stamp. + if mu.Util < util[len(util)-1].Util { + util[len(util)-1] = mu + } + return util + } + } + return append(util, mu) +} + // totalUtil is total utilization, measured in nanoseconds. This is a // separate type primarily to distinguish it from mean utilization, // which is also a float64. @@ -163,6 +236,10 @@ func (u totalUtil) mean(dur time.Duration) float64 { // An MMUCurve is the minimum mutator utilization curve across // multiple window sizes. type MMUCurve struct { + series []mmuSeries +} + +type mmuSeries struct { util []MutatorUtil // sums[j] is the cumulative sum of util[:j]. sums []totalUtil @@ -188,7 +265,15 @@ type mmuBand struct { // NewMMUCurve returns an MMU curve for the given mutator utilization // function. -func NewMMUCurve(util []MutatorUtil) *MMUCurve { +func NewMMUCurve(utils [][]MutatorUtil) *MMUCurve { + series := make([]mmuSeries, len(utils)) + for i, util := range utils { + series[i] = newMMUSeries(util) + } + return &MMUCurve{series} +} + +func newMMUSeries(util []MutatorUtil) mmuSeries { // Compute cumulative sum. sums := make([]totalUtil, len(util)) var prev MutatorUtil @@ -218,10 +303,10 @@ func NewMMUCurve(util []MutatorUtil) *MMUCurve { // Compute the bands. There are numBands+1 bands in order to // record the final cumulative sum. bands := make([]mmuBand, numBands+1) - c := MMUCurve{util, sums, bands, bandDur} - leftSum := integrator{&c, 0} + s := mmuSeries{util, sums, bands, bandDur} + leftSum := integrator{&s, 0} for i := range bands { - startTime, endTime := c.bandTime(i) + startTime, endTime := s.bandTime(i) cumUtil := leftSum.advance(startTime) predIdx := leftSum.pos minUtil := 1.0 @@ -231,16 +316,18 @@ func NewMMUCurve(util []MutatorUtil) *MMUCurve { bands[i] = mmuBand{minUtil, cumUtil, leftSum} } - return &c + return s } -func (c *MMUCurve) bandTime(i int) (start, end int64) { - start = int64(i)*c.bandDur + c.util[0].Time - end = start + c.bandDur +func (s *mmuSeries) bandTime(i int) (start, end int64) { + start = int64(i)*s.bandDur + s.util[0].Time + end = start + s.bandDur return } type bandUtil struct { + // Utilization series index + series int // Band index i int // Lower bound of mutator utilization for all windows @@ -402,12 +489,22 @@ func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) { acc.mmu = 0 return } - util := c.util - if max := time.Duration(util[len(util)-1].Time - util[0].Time); window > max { - window = max - } - bandU := bandUtilHeap(c.mkBandUtil(window)) + var bandU bandUtilHeap + windows := make([]time.Duration, len(c.series)) + for i, s := range c.series { + windows[i] = window + if max := time.Duration(s.util[len(s.util)-1].Time - s.util[0].Time); window > max { + windows[i] = max + } + + bandU1 := bandUtilHeap(s.mkBandUtil(i, windows[i])) + if bandU == nil { + bandU = bandU1 + } else { + bandU = append(bandU, bandU1...) + } + } // Process bands from lowest utilization bound to highest. heap.Init(&bandU) @@ -416,12 +513,13 @@ func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) { // refining the next lowest band can no longer affect the MMU // or windows. for len(bandU) > 0 && bandU[0].utilBound < acc.bound { - c.bandMMU(bandU[0].i, window, acc) + i := bandU[0].series + c.series[i].bandMMU(bandU[0].i, windows[i], acc) heap.Pop(&bandU) } } -func (c *MMUCurve) mkBandUtil(window time.Duration) []bandUtil { +func (c *mmuSeries) mkBandUtil(series int, window time.Duration) []bandUtil { // For each band, compute the worst-possible total mutator // utilization for all windows that start in that band. @@ -477,7 +575,7 @@ func (c *MMUCurve) mkBandUtil(window time.Duration) []bandUtil { util += c.bands[i+minBands-1].cumUtil - c.bands[i+1].cumUtil } - bandU[i] = bandUtil{i, util.mean(window)} + bandU[i] = bandUtil{series, i, util.mean(window)} } return bandU @@ -485,7 +583,7 @@ func (c *MMUCurve) mkBandUtil(window time.Duration) []bandUtil { // bandMMU computes the precise minimum mutator utilization for // windows with a left edge in band bandIdx. -func (c *MMUCurve) bandMMU(bandIdx int, window time.Duration, acc *accumulator) { +func (c *mmuSeries) bandMMU(bandIdx int, window time.Duration, acc *accumulator) { util := c.util // We think of the mutator utilization over time as the @@ -542,7 +640,7 @@ func (c *MMUCurve) bandMMU(bandIdx int, window time.Duration, acc *accumulator) // An integrator tracks a position in a utilization function and // integrates it. type integrator struct { - u *MMUCurve + u *mmuSeries // pos is the index in u.util of the current time's non-strict // predecessor. pos int diff --git a/src/internal/traceparser/gc_test.go b/src/internal/traceparser/gc_test.go index f1416fa9f9..b438a2931f 100644 --- a/src/internal/traceparser/gc_test.go +++ b/src/internal/traceparser/gc_test.go @@ -29,14 +29,14 @@ func TestMMU(t *testing.T) { // 0.5 * * * * // 0.0 ***** ***** // 0 1 2 3 4 5 - util := []MutatorUtil{ + util := [][]MutatorUtil{{ {0e9, 1}, {1e9, 0}, {2e9, 1}, {3e9, 0}, {4e9, 1}, {5e9, 0}, - } + }} mmuCurve := NewMMUCurve(util) for _, test := range []struct { @@ -90,7 +90,7 @@ func TestMMUTrace(t *testing.T) { // Test the optimized implementation against the "obviously // correct" implementation. for window := time.Nanosecond; window < 10*time.Second; window *= 10 { - want := mmuSlow(mu, window) + want := mmuSlow(mu[0], window) got := mmuCurve.MMU(window) if !aeq(want, got) { t.Errorf("want %f, got %f mutator utilization in window %s", want, got, window)