internal/trace: add "per-P" MMU analysis

author Austin Clements <austin@google.com>

Fri, 28 Jul 2017 20:26:51 +0000 (16:26 -0400)

committer Austin Clements <austin@google.com>

Mon, 5 Nov 2018 19:10:23 +0000 (19:10 +0000)
author Austin Clements <austin@google.com>
Fri, 28 Jul 2017 20:26:51 +0000 (16:26 -0400)
committer Austin Clements <austin@google.com>
Mon, 5 Nov 2018 19:10:23 +0000 (19:10 +0000)
diff --git a/src/cmd/trace/mmu.go b/src/cmd/trace/mmu.go

index 2a07be4ba2d1f12d642bf0904cece10b74bb806a..d3b6768686a2d8663b6699bae102239387eaab38 100644 (file)
--- a/src/cmd/trace/mmu.go
+++ b/src/cmd/trace/mmu.go
@@ -27,12 +27,12 @@ func init() {
  
  var mmuCache struct {
         init     sync.Once
-       util     []trace.MutatorUtil
+       util     [][]trace.MutatorUtil
         mmuCurve *trace.MMUCurve
         err      error
  }
  
-func getMMUCurve() ([]trace.MutatorUtil, *trace.MMUCurve, error) {
+func getMMUCurve() ([][]trace.MutatorUtil, *trace.MMUCurve, error) {
         mmuCache.init.Do(func() {
                 tr, err := parseTrace()
                 if err != nil {
@@ -69,7 +69,16 @@ func httpMMUPlot(w http.ResponseWriter, r *http.Request) {
         // Cover six orders of magnitude.
         xMax := xMin * 1e6
         // But no more than the length of the trace.
-       if maxMax := time.Duration(mu[len(mu)-1].Time - mu[0].Time); xMax > maxMax {
+       minEvent, maxEvent := mu[0][0].Time, mu[0][len(mu[0])-1].Time
+       for _, mu1 := range mu[1:] {
+               if mu1[0].Time < minEvent {
+                       minEvent = mu1[0].Time
+               }
+               if mu1[len(mu1)-1].Time > maxEvent {
+                       maxEvent = mu1[len(mu1)-1].Time
+               }
+       }
+       if maxMax := time.Duration(maxEvent - minEvent); xMax > maxMax {
                 xMax = maxMax
         }
         // Compute MMU curve.
diff --git a/src/internal/traceparser/gc.go b/src/internal/traceparser/gc.go

index ab0c640e26cb016d279bcc056715821f5c8bd90c..569ab86b826bacf575aed9edf9bada31b7c8b64f 100644 (file)
--- a/src/internal/traceparser/gc.go
+++ b/src/internal/traceparser/gc.go
@@ -36,27 +36,64 @@ const (
         UtilAssist
         // UtilSweep means utilization should account for sweeping.
         UtilSweep
+
+       // UtilPerProc means each P should be given a separate
+       // utilization function. Otherwise, there is a single function
+       // and each P is given a fraction of the utilization.
+       UtilPerProc
  )
  
-// MutatorUtilization returns the mutator utilization function for the
-// given trace. This function will always end with 0 utilization. The
-// bounds of the function are implicit in the first and last event;
-// outside of these bounds the function is undefined.
-func (p *Parsed) MutatorUtilization(flags UtilFlags) []MutatorUtil {
+// MutatorUtilization returns a set of mutator utilization functions
+// for the given trace. Each function will always end with 0
+// utilization. The bounds of each function are implicit in the first
+// and last event; outside of these bounds each function is undefined.
+//
+// If the UtilPerProc flag is not given, this always returns a single
+// utilization function. Otherwise, it returns one function per P.
+func (p *Parsed) MutatorUtilization(flags UtilFlags) [][]MutatorUtil {
         events := p.Events
         if len(events) == 0 {
                 return nil
         }
  
-       gomaxprocs, gcPs, stw := 1, 0, 0
-       out := []MutatorUtil{{events[0].Ts, 1}}
+       type perP struct {
+               // gc > 0 indicates that GC is active on this P.
+               gc int
+               // series the logical series number for this P. This
+               // is necessary because Ps may be removed and then
+               // re-added, and then the new P needs a new series.
+               series int
+       }
+       ps := []perP{}
+       stw := 0
+
+       out := [][]MutatorUtil{}
         assists := map[uint64]bool{}
         block := map[uint64]*Event{}
         bgMark := map[uint64]bool{}
+
         for _, ev := range events {
                 switch ev.Type {
                 case EvGomaxprocs:
-                       gomaxprocs = int(ev.Args[0])
+                       gomaxprocs := int(ev.Args[0])
+                       if len(ps) > gomaxprocs {
+                               if flags&UtilPerProc != 0 {
+                                       // End each P's series.
+                                       for _, p := range ps[gomaxprocs:] {
+                                               out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, 0})
+                                       }
+                               }
+                               ps = ps[:gomaxprocs]
+                       }
+                       for len(ps) < gomaxprocs {
+                               // Start new P's series.
+                               series := 0
+                               if flags&UtilPerProc != 0 || len(out) == 0 {
+                                       series = len(out)
+                                       out = append(out, []MutatorUtil{{ev.Ts, 1}})
+                               }
+                               ps = append(ps, perP{series: series})
+                       }
                 case EvGCSTWStart:
                         if flags&UtilSTW != 0 {
                                 stw++
@@ -67,33 +104,41 @@ func (p *Parsed) MutatorUtilization(flags UtilFlags) []MutatorUtil {
                         }
                 case EvGCMarkAssistStart:
                         if flags&UtilAssist != 0 {
-                               gcPs++
+                               ps[ev.P].gc++
                                 assists[ev.G] = true
                         }
                 case EvGCMarkAssistDone:
                         if flags&UtilAssist != 0 {
-                               gcPs--
+                               ps[ev.P].gc--
                                 delete(assists, ev.G)
                         }
                 case EvGCSweepStart:
                         if flags&UtilSweep != 0 {
-                               gcPs++
+                               ps[ev.P].gc++
                         }
                 case EvGCSweepDone:
                         if flags&UtilSweep != 0 {
-                               gcPs--
+                               ps[ev.P].gc--
                         }
                 case EvGoStartLabel:
                         if flags&UtilBackground != 0 && strings.HasPrefix(ev.SArgs[0], "GC ") && ev.SArgs[0] != "GC (idle)" {
                                 // Background mark worker.
-                               bgMark[ev.G] = true
-                               gcPs++
+                               //
+                               // If we're in per-proc mode, we don't
+                               // count dedicated workers because
+                               // they kick all of the goroutines off
+                               // that P, so don't directly
+                               // contribute to goroutine latency.
+                               if !(flags&UtilPerProc != 0 && ev.SArgs[0] == "GC (dedicated)") {
+                                       bgMark[ev.G] = true
+                                       ps[ev.P].gc++
+                               }
                         }
                         fallthrough
                 case EvGoStart:
                         if assists[ev.G] {
                                 // Unblocked during assist.
-                               gcPs++
+                               ps[ev.P].gc++
                         }
                         block[ev.G] = ev.Link
                 default:
@@ -103,49 +148,77 @@ func (p *Parsed) MutatorUtilization(flags UtilFlags) []MutatorUtil {
  
                         if assists[ev.G] {
                                 // Blocked during assist.
-                               gcPs--
+                               ps[ev.P].gc--
                         }
                         if bgMark[ev.G] {
                                 // Background mark worker done.
-                               gcPs--
+                               ps[ev.P].gc--
                                 delete(bgMark, ev.G)
                         }
                         delete(block, ev.G)
                 }
  
-               ps := gcPs
-               if stw > 0 {
-                       ps = gomaxprocs
-               }
-               mu := MutatorUtil{ev.Ts, 1 - float64(ps)/float64(gomaxprocs)}
-               if mu.Util == out[len(out)-1].Util {
-                       // No change.
-                       continue
-               }
-               if mu.Time == out[len(out)-1].Time {
-                       // Take the lowest utilization at a time stamp.
-                       if mu.Util < out[len(out)-1].Util {
-                               out[len(out)-1] = mu
+               if flags&UtilPerProc == 0 {
+                       // Compute the current average utilization.
+                       if len(ps) == 0 {
+                               continue
                         }
+                       gcPs := 0
+                       if stw > 0 {
+                               gcPs = len(ps)
+                       } else {
+                               for i := range ps {
+                                       if ps[i].gc > 0 {
+                                               gcPs++
+                                       }
+                               }
+                       }
+                       mu := MutatorUtil{ev.Ts, 1 - float64(gcPs)/float64(len(ps))}
+
+                       // Record the utilization change. (Since
+                       // len(ps) == len(out), we know len(out) > 0.)
+                       out[0] = addUtil(out[0], mu)
                 } else {
-                       out = append(out, mu)
+                       // Check for per-P utilization changes.
+                       for i := range ps {
+                               p := &ps[i]
+                               util := 1.0
+                               if stw > 0 || p.gc > 0 {
+                                       util = 0.0
+                               }
+                               out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, util})
+                       }
                 }
         }
  
-       // Add final 0 utilization event. This is important to mark
-       // the end of the trace. The exact value shouldn't matter
-       // since no window should extend beyond this, but using 0 is
-       // symmetric with the start of the trace.
-       endTime := events[len(events)-1].Ts
-       if out[len(out)-1].Time == endTime {
-               out[len(out)-1].Util = 0
-       } else {
-               out = append(out, MutatorUtil{endTime, 0})
+       // Add final 0 utilization event to any remaining series. This
+       // is important to mark the end of the trace. The exact value
+       // shouldn't matter since no window should extend beyond this,
+       // but using 0 is symmetric with the start of the trace.
+       mu := MutatorUtil{events[len(events)-1].Ts, 0}
+       for i := range ps {
+               out[ps[i].series] = addUtil(out[ps[i].series], mu)
         }
-
         return out
  }
  
+func addUtil(util []MutatorUtil, mu MutatorUtil) []MutatorUtil {
+       if len(util) > 0 {
+               if mu.Util == util[len(util)-1].Util {
+                       // No change.
+                       return util
+               }
+               if mu.Time == util[len(util)-1].Time {
+                       // Take the lowest utilization at a time stamp.
+                       if mu.Util < util[len(util)-1].Util {
+                               util[len(util)-1] = mu
+                       }
+                       return util
+               }
+       }
+       return append(util, mu)
+}
+
  // totalUtil is total utilization, measured in nanoseconds. This is a
  // separate type primarily to distinguish it from mean utilization,
  // which is also a float64.
@@ -163,6 +236,10 @@ func (u totalUtil) mean(dur time.Duration) float64 {
  // An MMUCurve is the minimum mutator utilization curve across
  // multiple window sizes.
  type MMUCurve struct {
+       series []mmuSeries
+}
+
+type mmuSeries struct {
         util []MutatorUtil
         // sums[j] is the cumulative sum of util[:j].
         sums []totalUtil
@@ -188,7 +265,15 @@ type mmuBand struct {
  
  // NewMMUCurve returns an MMU curve for the given mutator utilization
  // function.
-func NewMMUCurve(util []MutatorUtil) *MMUCurve {
+func NewMMUCurve(utils [][]MutatorUtil) *MMUCurve {
+       series := make([]mmuSeries, len(utils))
+       for i, util := range utils {
+               series[i] = newMMUSeries(util)
+       }
+       return &MMUCurve{series}
+}
+
+func newMMUSeries(util []MutatorUtil) mmuSeries {
         // Compute cumulative sum.
         sums := make([]totalUtil, len(util))
         var prev MutatorUtil
@@ -218,10 +303,10 @@ func NewMMUCurve(util []MutatorUtil) *MMUCurve {
         // Compute the bands. There are numBands+1 bands in order to
         // record the final cumulative sum.
         bands := make([]mmuBand, numBands+1)
-       c := MMUCurve{util, sums, bands, bandDur}
-       leftSum := integrator{&c, 0}
+       s := mmuSeries{util, sums, bands, bandDur}
+       leftSum := integrator{&s, 0}
         for i := range bands {
-               startTime, endTime := c.bandTime(i)
+               startTime, endTime := s.bandTime(i)
                 cumUtil := leftSum.advance(startTime)
                 predIdx := leftSum.pos
                 minUtil := 1.0
@@ -231,16 +316,18 @@ func NewMMUCurve(util []MutatorUtil) *MMUCurve {
                 bands[i] = mmuBand{minUtil, cumUtil, leftSum}
         }
  
-       return &c
+       return s
  }
  
-func (c *MMUCurve) bandTime(i int) (start, end int64) {
-       start = int64(i)*c.bandDur + c.util[0].Time
-       end = start + c.bandDur
+func (s *mmuSeries) bandTime(i int) (start, end int64) {
+       start = int64(i)*s.bandDur + s.util[0].Time
+       end = start + s.bandDur
         return
  }
  
  type bandUtil struct {
+       // Utilization series index
+       series int
         // Band index
         i int
         // Lower bound of mutator utilization for all windows
@@ -402,12 +489,22 @@ func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) {
                 acc.mmu = 0
                 return
         }
-       util := c.util
-       if max := time.Duration(util[len(util)-1].Time - util[0].Time); window > max {
-               window = max
-       }
  
-       bandU := bandUtilHeap(c.mkBandUtil(window))
+       var bandU bandUtilHeap
+       windows := make([]time.Duration, len(c.series))
+       for i, s := range c.series {
+               windows[i] = window
+               if max := time.Duration(s.util[len(s.util)-1].Time - s.util[0].Time); window > max {
+                       windows[i] = max
+               }
+
+               bandU1 := bandUtilHeap(s.mkBandUtil(i, windows[i]))
+               if bandU == nil {
+                       bandU = bandU1
+               } else {
+                       bandU = append(bandU, bandU1...)
+               }
+       }
  
         // Process bands from lowest utilization bound to highest.
         heap.Init(&bandU)
@@ -416,12 +513,13 @@ func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) {
         // refining the next lowest band can no longer affect the MMU
         // or windows.
         for len(bandU) > 0 && bandU[0].utilBound < acc.bound {
-               c.bandMMU(bandU[0].i, window, acc)
+               i := bandU[0].series
+               c.series[i].bandMMU(bandU[0].i, windows[i], acc)
                 heap.Pop(&bandU)
         }
  }
  
-func (c *MMUCurve) mkBandUtil(window time.Duration) []bandUtil {
+func (c *mmuSeries) mkBandUtil(series int, window time.Duration) []bandUtil {
         // For each band, compute the worst-possible total mutator
         // utilization for all windows that start in that band.
  
@@ -477,7 +575,7 @@ func (c *MMUCurve) mkBandUtil(window time.Duration) []bandUtil {
                         util += c.bands[i+minBands-1].cumUtil - c.bands[i+1].cumUtil
                 }
  
-               bandU[i] = bandUtil{i, util.mean(window)}
+               bandU[i] = bandUtil{series, i, util.mean(window)}
         }
  
         return bandU
@@ -485,7 +583,7 @@ func (c *MMUCurve) mkBandUtil(window time.Duration) []bandUtil {
  
  // bandMMU computes the precise minimum mutator utilization for
  // windows with a left edge in band bandIdx.
-func (c *MMUCurve) bandMMU(bandIdx int, window time.Duration, acc *accumulator) {
+func (c *mmuSeries) bandMMU(bandIdx int, window time.Duration, acc *accumulator) {
         util := c.util
  
         // We think of the mutator utilization over time as the
@@ -542,7 +640,7 @@ func (c *MMUCurve) bandMMU(bandIdx int, window time.Duration, acc *accumulator)
  // An integrator tracks a position in a utilization function and
  // integrates it.
  type integrator struct {
-       u *MMUCurve
+       u *mmuSeries
         // pos is the index in u.util of the current time's non-strict
         // predecessor.
         pos int
diff --git a/src/internal/traceparser/gc_test.go b/src/internal/traceparser/gc_test.go

index f1416fa9f92ee225381a7c6a0596a97fd01f8345..b438a2931f0dc2dd62be37e2ad0d7b54cb8b3d21 100644 (file)
--- a/src/internal/traceparser/gc_test.go
+++ b/src/internal/traceparser/gc_test.go
@@ -29,14 +29,14 @@ func TestMMU(t *testing.T) {
         // 0.5      *   *   *   *
         // 0.0      *****   *****
         //      0   1   2   3   4   5
-       util := []MutatorUtil{
+       util := [][]MutatorUtil{{
                 {0e9, 1},
                 {1e9, 0},
                 {2e9, 1},
                 {3e9, 0},
                 {4e9, 1},
                 {5e9, 0},
-       }
+       }}
         mmuCurve := NewMMUCurve(util)
  
         for _, test := range []struct {
@@ -90,7 +90,7 @@ func TestMMUTrace(t *testing.T) {
         // Test the optimized implementation against the "obviously
         // correct" implementation.
         for window := time.Nanosecond; window < 10*time.Second; window *= 10 {
-               want := mmuSlow(mu, window)
+               want := mmuSlow(mu[0], window)
                 got := mmuCurve.MMU(window)
                 if !aeq(want, got) {
                         t.Errorf("want %f, got %f mutator utilization in window %s", want, got, window)
author	Austin Clements <austin@google.com>
	Fri, 28 Jul 2017 20:26:51 +0000 (16:26 -0400)
committer	Austin Clements <austin@google.com>
	Mon, 5 Nov 2018 19:10:23 +0000 (19:10 +0000)
src/cmd/trace/mmu.go		patch \| blob \| history
src/internal/traceparser/gc.go		patch \| blob \| history
src/internal/traceparser/gc_test.go		patch \| blob \| history