From: Michael Anthony Knyszek Date: Tue, 14 Nov 2023 03:38:04 +0000 (+0000) Subject: cmd/trace/v2: add support for task and region endpoints X-Git-Tag: go1.22rc1~190 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=71052169be2d4de14f2e93ea6ba9811b54434bc3;p=gostls13.git cmd/trace/v2: add support for task and region endpoints This change fills out the last of cmd/trace's subpages for v2 traces by adding support for task and region endpoints. For #60773. For #63960. Change-Id: Ifc4c660514b3904788785a1b20e3abc3bb9e55f1 Reviewed-on: https://go-review.googlesource.com/c/go/+/542077 Reviewed-by: Michael Pratt Auto-Submit: Michael Knyszek LUCI-TryBot-Result: Go LUCI --- diff --git a/src/cmd/trace/v2/goroutines.go b/src/cmd/trace/v2/goroutines.go index 5c300024ee..44febeba88 100644 --- a/src/cmd/trace/v2/goroutines.go +++ b/src/cmd/trace/v2/goroutines.go @@ -67,7 +67,7 @@ var templGoroutines = template.Must(template.New("").Parse(` + +

Regions

+ +Below is a table containing a summary of all the user-defined regions in the trace. +Regions are grouped by the region type and the point at which the region started. +The rightmost column of the table contains a latency histogram for each region group. +Note that this histogram only counts regions that began and ended within the traced +period. +However, the "Count" column includes all regions, including those that only started +or ended during the traced period. +Regions that were active through the trace period were not recorded, and so are not +accounted for at all. +Click on the links to explore a breakdown of time spent for each region by goroutine +and user-defined task. +
+
+ + + + + + + +{{range $}} + + + + + +{{end}} +
Region typeCountDuration distribution (complete tasks)
{{printf "%q" .Type}}
{{.Frame.Func}} @ {{printf "0x%x" .Frame.PC}}
{{.Frame.File}}:{{.Frame.Line}}
{{.Histogram.Count}}{{.Histogram.ToHTML (.UserRegionURL)}}
+ + +`)) + +// UserRegionHandlerFunc returns a HandlerFunc that presents the details of the selected regions. +func UserRegionHandlerFunc(t *parsedTrace) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + // Construct the filter from the request. + filter, err := newRegionFilter(r) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + // Collect all the regions with their goroutines. + type region struct { + *trace.UserRegionSummary + Goroutine tracev2.GoID + NonOverlappingStats map[string]time.Duration + HasRangeTime bool + } + var regions []region + var maxTotal time.Duration + validNonOverlappingStats := make(map[string]struct{}) + validRangeStats := make(map[string]struct{}) + for _, g := range t.summary.Goroutines { + for _, r := range g.Regions { + if !filter.match(t, r) { + continue + } + nonOverlappingStats := r.NonOverlappingStats() + for name := range nonOverlappingStats { + validNonOverlappingStats[name] = struct{}{} + } + var totalRangeTime time.Duration + for name, dt := range r.RangeTime { + validRangeStats[name] = struct{}{} + totalRangeTime += dt + } + regions = append(regions, region{ + UserRegionSummary: r, + Goroutine: g.ID, + NonOverlappingStats: nonOverlappingStats, + HasRangeTime: totalRangeTime != 0, + }) + if maxTotal < r.TotalTime { + maxTotal = r.TotalTime + } + } + } + + // Sort. + sortBy := r.FormValue("sortby") + if _, ok := validNonOverlappingStats[sortBy]; ok { + slices.SortFunc(regions, func(a, b region) int { + return cmp.Compare(b.NonOverlappingStats[sortBy], a.NonOverlappingStats[sortBy]) + }) + } else { + // Sort by total time by default. + slices.SortFunc(regions, func(a, b region) int { + return cmp.Compare(b.TotalTime, a.TotalTime) + }) + } + + // Write down all the non-overlapping stats and sort them. + allNonOverlappingStats := make([]string, 0, len(validNonOverlappingStats)) + for name := range validNonOverlappingStats { + allNonOverlappingStats = append(allNonOverlappingStats, name) + } + slices.SortFunc(allNonOverlappingStats, func(a, b string) int { + if a == b { + return 0 + } + if a == "Execution time" { + return -1 + } + if b == "Execution time" { + return 1 + } + return cmp.Compare(a, b) + }) + + // Write down all the range stats and sort them. + allRangeStats := make([]string, 0, len(validRangeStats)) + for name := range validRangeStats { + allRangeStats = append(allRangeStats, name) + } + sort.Strings(allRangeStats) + + err = templUserRegionType.Execute(w, struct { + MaxTotal time.Duration + Regions []region + Name string + Filter *regionFilter + NonOverlappingStats []string + RangeStats []string + }{ + MaxTotal: maxTotal, + Regions: regions, + Name: filter.name, + Filter: filter, + NonOverlappingStats: allNonOverlappingStats, + RangeStats: allRangeStats, + }) + if err != nil { + http.Error(w, fmt.Sprintf("failed to execute template: %v", err), http.StatusInternalServerError) + return + } + } +} + +var templUserRegionType = template.Must(template.New("").Funcs(template.FuncMap{ + "headerStyle": func(statName string) template.HTMLAttr { + return template.HTMLAttr(fmt.Sprintf("style=\"background-color: %s;\"", stat2Color(statName))) + }, + "barStyle": func(statName string, dividend, divisor time.Duration) template.HTMLAttr { + width := "0" + if divisor != 0 { + width = fmt.Sprintf("%.2f%%", float64(dividend)/float64(divisor)*100) + } + return template.HTMLAttr(fmt.Sprintf("style=\"width: %s; background-color: %s;\"", width, stat2Color(statName))) + }, + "filterParams": func(f *regionFilter) template.URL { + return template.URL(f.params.Encode()) + }, +}).Parse(` + +Regions: {{.Name}} + + + + +

Regions: {{.Name}}

+ +Table of contents + + +

Summary

+ +{{ with $p := filterParams .Filter}} + + + + + + + + + + + + + + + + + +
Network wait profile: graph (download)
Sync block profile: graph (download)
Syscall profile: graph (download)
Scheduler wait profile: graph (download)
+{{ end }} + +

Breakdown

+ +The table below breaks down where each goroutine is spent its time during the +traced period. +All of the columns except total time are non-overlapping. +
+
+ + + + + + + +{{range $.NonOverlappingStats}} + +{{end}} + +{{range .Regions}} + + + + + + {{$Region := .}} + {{range $.NonOverlappingStats}} + {{$Time := index $Region.NonOverlappingStats .}} + + {{end}} + +{{end}} +
Goroutine Task Total {{.}}
{{.Goroutine}} {{if .TaskID}}{{.TaskID}}{{end}} {{ .TotalTime.String }} +
+ {{$Region := .}} + {{range $.NonOverlappingStats}} + {{$Time := index $Region.NonOverlappingStats .}} + {{if $Time}} +   + {{end}} + {{end}} +
+
{{$Time.String}}
+ +

Special ranges

+ +The table below describes how much of the traced period each goroutine spent in +certain special time ranges. +If a goroutine has spent no time in any special time ranges, it is excluded from +the table. +For example, how much time it spent helping the GC. Note that these times do +overlap with the times from the first table. +In general the goroutine may not be executing in these special time ranges. +For example, it may have blocked while trying to help the GC. +This must be taken into account when interpreting the data. +
+
+ + + + + + +{{range $.RangeStats}} + +{{end}} + +{{range .Regions}} + {{if .HasRangeTime}} + + + + + {{$Region := .}} + {{range $.RangeStats}} + {{$Time := index $Region.RangeTime .}} + + {{end}} + + {{end}} +{{end}} +
Goroutine Task Total {{.}}
{{.Goroutine}} {{if .TaskID}}{{.TaskID}}{{end}} {{ .TotalTime.String }} {{$Time.String}}
+`)) + +// regionFilter represents a region filter specified by a user of cmd/trace. +type regionFilter struct { + name string + params url.Values + cond []func(*parsedTrace, *trace.UserRegionSummary) bool +} + +// match returns true if a region, described by its ID and summary, matches +// the filter. +func (f *regionFilter) match(t *parsedTrace, s *trace.UserRegionSummary) bool { + for _, c := range f.cond { + if !c(t, s) { + return false + } + } + return true +} + +// newRegionFilter creates a new region filter from URL query variables. +func newRegionFilter(r *http.Request) (*regionFilter, error) { + if err := r.ParseForm(); err != nil { + return nil, err + } + + var name []string + var conditions []func(*parsedTrace, *trace.UserRegionSummary) bool + filterParams := make(url.Values) + + param := r.Form + if typ, ok := param["type"]; ok && len(typ) > 0 { + name = append(name, fmt.Sprintf("%q", typ[0])) + conditions = append(conditions, func(_ *parsedTrace, r *trace.UserRegionSummary) bool { + return r.Name == typ[0] + }) + filterParams.Add("type", typ[0]) + } + if pc, err := strconv.ParseUint(r.FormValue("pc"), 16, 64); err == nil { + encPC := fmt.Sprintf("0x%x", pc) + name = append(name, "@ "+encPC) + conditions = append(conditions, func(_ *parsedTrace, r *trace.UserRegionSummary) bool { + return regionTopStackFrame(r).PC == pc + }) + filterParams.Add("pc", encPC) + } + + if lat, err := time.ParseDuration(r.FormValue("latmin")); err == nil { + name = append(name, fmt.Sprintf("(latency >= %s)", lat)) + conditions = append(conditions, func(t *parsedTrace, r *trace.UserRegionSummary) bool { + return regionInterval(t, r).duration() >= lat + }) + filterParams.Add("latmin", lat.String()) + } + if lat, err := time.ParseDuration(r.FormValue("latmax")); err == nil { + name = append(name, fmt.Sprintf("(latency <= %s)", lat)) + conditions = append(conditions, func(t *parsedTrace, r *trace.UserRegionSummary) bool { + return regionInterval(t, r).duration() <= lat + }) + filterParams.Add("latmax", lat.String()) + } + + return ®ionFilter{ + name: strings.Join(name, " "), + cond: conditions, + params: filterParams, + }, nil +} + +func regionInterval(t *parsedTrace, s *trace.UserRegionSummary) interval { + var i interval + if s.Start != nil { + i.start = s.Start.Time() + } else { + i.start = t.startTime() + } + if s.End != nil { + i.end = s.End.Time() + } else { + i.end = t.endTime() + } + return i +} diff --git a/src/cmd/trace/v2/tasks.go b/src/cmd/trace/v2/tasks.go new file mode 100644 index 0000000000..fb40811565 --- /dev/null +++ b/src/cmd/trace/v2/tasks.go @@ -0,0 +1,477 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package trace + +import ( + "bytes" + "cmp" + "fmt" + "html/template" + "internal/trace" + "internal/trace/traceviewer" + tracev2 "internal/trace/v2" + "log" + "net/http" + "slices" + "strings" + "time" +) + +// UserTasksHandlerFunc returns a HandlerFunc that reports all tasks found in the trace. +func UserTasksHandlerFunc(t *parsedTrace) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + tasks := t.summary.Tasks + + // Summarize groups of tasks with the same name. + summary := make(map[string]taskStats) + for _, task := range tasks { + stats, ok := summary[task.Name] + if !ok { + stats.Type = task.Name + } + stats.add(task) + summary[task.Name] = stats + } + + // Sort tasks by type. + userTasks := make([]taskStats, 0, len(summary)) + for _, stats := range summary { + userTasks = append(userTasks, stats) + } + slices.SortFunc(userTasks, func(a, b taskStats) int { + return cmp.Compare(a.Type, b.Type) + }) + + // Emit table. + err := templUserTaskTypes.Execute(w, userTasks) + if err != nil { + http.Error(w, fmt.Sprintf("failed to execute template: %v", err), http.StatusInternalServerError) + return + } + } +} + +type taskStats struct { + Type string + Count int // Complete + incomplete tasks + Histogram traceviewer.TimeHistogram // Complete tasks only +} + +func (s *taskStats) UserTaskURL(complete bool) func(min, max time.Duration) string { + return func(min, max time.Duration) string { + return fmt.Sprintf("/usertask?type=%s&complete=%v&latmin=%v&latmax=%v", template.URLQueryEscaper(s.Type), template.URLQueryEscaper(complete), template.URLQueryEscaper(min), template.URLQueryEscaper(max)) + } +} + +func (s *taskStats) add(task *trace.UserTaskSummary) { + s.Count++ + if task.Complete() { + s.Histogram.Add(task.End.Time().Sub(task.Start.Time())) + } +} + +var templUserTaskTypes = template.Must(template.New("").Parse(` + +Tasks + + +Search log text:

+ + + + + + +{{range $}} + + + + + +{{end}} +
Task typeCountDuration distribution (complete tasks)
{{.Type}}{{.Count}}{{.Histogram.ToHTML (.UserTaskURL true)}}
+ + +`)) + +// UserTaskHandlerFunc returns a HandlerFunc that presents the details of the selected tasks. +func UserTaskHandlerFunc(t *parsedTrace) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + filter, err := newTaskFilter(r) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + type event struct { + WhenString string + Elapsed time.Duration + Goroutine tracev2.GoID + What string + // TODO: include stack trace of creation time + } + type task struct { + WhenString string + ID tracev2.TaskID + Duration time.Duration + Complete bool + Events []event + Start, End time.Duration // Time since the beginning of the trace + GCTime time.Duration + } + var tasks []task + for _, summary := range t.summary.Tasks { + if !filter.match(t, summary) { + continue + } + + // Collect all the events for the task. + var rawEvents []*tracev2.Event + if summary.Start != nil { + rawEvents = append(rawEvents, summary.Start) + } + if summary.End != nil { + rawEvents = append(rawEvents, summary.End) + } + rawEvents = append(rawEvents, summary.Logs...) + for _, r := range summary.Regions { + if r.Start != nil { + rawEvents = append(rawEvents, r.Start) + } + if r.End != nil { + rawEvents = append(rawEvents, r.End) + } + } + + // Sort them. + slices.SortStableFunc(rawEvents, func(a, b *tracev2.Event) int { + return cmp.Compare(a.Time(), b.Time()) + }) + + // Summarize them. + var events []event + last := t.startTime() + for _, ev := range rawEvents { + what := describeEvent(ev) + if what == "" { + continue + } + sinceStart := ev.Time().Sub(t.startTime()) + events = append(events, event{ + WhenString: fmt.Sprintf("%2.9f", sinceStart.Seconds()), + Elapsed: ev.Time().Sub(last), + What: what, + Goroutine: primaryGoroutine(ev), + }) + last = ev.Time() + } + taskSpan := taskInterval(t, summary) + taskStart := taskSpan.start.Sub(t.startTime()) + + // Produce the task summary. + tasks = append(tasks, task{ + WhenString: fmt.Sprintf("%2.9fs", taskStart.Seconds()), + Duration: taskSpan.duration(), + ID: summary.ID, + Complete: summary.Complete(), + Events: events, + Start: taskStart, + End: taskStart + taskSpan.duration(), + }) + } + // Sort the tasks by duration. + slices.SortFunc(tasks, func(a, b task) int { + return cmp.Compare(a.Duration, b.Duration) + }) + + // Emit table. + err = templUserTaskType.Execute(w, struct { + Name string + Tasks []task + }{ + Name: filter.name, + Tasks: tasks, + }) + if err != nil { + log.Printf("failed to execute template: %v", err) + http.Error(w, fmt.Sprintf("failed to execute template: %v", err), http.StatusInternalServerError) + return + } + } +} + +var templUserTaskType = template.Must(template.New("userTask").Funcs(template.FuncMap{ + "elapsed": elapsed, + "asMillisecond": asMillisecond, + "trimSpace": strings.TrimSpace, +}).Parse(` + +Tasks: {{.Name}} + + + +

User Task: {{.Name}}

+ +Search log text:
+ +

+ + + + + + + + + {{range $el := $.Tasks}} + + + + + + + {{range $el.Events}} + + + + + + + {{end}} + {{end}} + + +`)) + +// taskFilter represents a task filter specified by a user of cmd/trace. +type taskFilter struct { + name string + cond []func(*parsedTrace, *trace.UserTaskSummary) bool +} + +// match returns true if a task, described by its ID and summary, matches +// the filter. +func (f *taskFilter) match(t *parsedTrace, task *trace.UserTaskSummary) bool { + if t == nil { + return false + } + for _, c := range f.cond { + if !c(t, task) { + return false + } + } + return true +} + +// newTaskFilter creates a new task filter from URL query variables. +func newTaskFilter(r *http.Request) (*taskFilter, error) { + if err := r.ParseForm(); err != nil { + return nil, err + } + + var name []string + var conditions []func(*parsedTrace, *trace.UserTaskSummary) bool + + param := r.Form + if typ, ok := param["type"]; ok && len(typ) > 0 { + name = append(name, fmt.Sprintf("%q", typ[0])) + conditions = append(conditions, func(_ *parsedTrace, task *trace.UserTaskSummary) bool { + return task.Name == typ[0] + }) + } + if complete := r.FormValue("complete"); complete == "1" { + name = append(name, "complete") + conditions = append(conditions, func(_ *parsedTrace, task *trace.UserTaskSummary) bool { + return task.Complete() + }) + } else if complete == "0" { + name = append(name, "incomplete") + conditions = append(conditions, func(_ *parsedTrace, task *trace.UserTaskSummary) bool { + return !task.Complete() + }) + } + if lat, err := time.ParseDuration(r.FormValue("latmin")); err == nil { + name = append(name, fmt.Sprintf("latency >= %s", lat)) + conditions = append(conditions, func(t *parsedTrace, task *trace.UserTaskSummary) bool { + return task.Complete() && taskInterval(t, task).duration() >= lat + }) + } + if lat, err := time.ParseDuration(r.FormValue("latmax")); err == nil { + name = append(name, fmt.Sprintf("latency <= %s", lat)) + conditions = append(conditions, func(t *parsedTrace, task *trace.UserTaskSummary) bool { + return task.Complete() && taskInterval(t, task).duration() <= lat + }) + } + if text := r.FormValue("logtext"); text != "" { + name = append(name, fmt.Sprintf("log contains %q", text)) + conditions = append(conditions, func(_ *parsedTrace, task *trace.UserTaskSummary) bool { + return taskMatches(task, text) + }) + } + + return &taskFilter{name: strings.Join(name, ","), cond: conditions}, nil +} + +func taskInterval(t *parsedTrace, s *trace.UserTaskSummary) interval { + var i interval + if s.Start != nil { + i.start = s.Start.Time() + } else { + i.start = t.startTime() + } + if s.End != nil { + i.end = s.End.Time() + } else { + i.end = t.endTime() + } + return i +} + +func taskMatches(t *trace.UserTaskSummary, text string) bool { + matches := func(s string) bool { + return strings.Contains(s, text) + } + if matches(t.Name) { + return true + } + for _, r := range t.Regions { + if matches(r.Name) { + return true + } + } + for _, ev := range t.Logs { + log := ev.Log() + if matches(log.Category) { + return true + } + if matches(log.Message) { + return true + } + } + return false +} + +func describeEvent(ev *tracev2.Event) string { + switch ev.Kind() { + case tracev2.EventStateTransition: + st := ev.StateTransition() + if st.Resource.Kind != tracev2.ResourceGoroutine { + return "" + } + old, new := st.Goroutine() + return fmt.Sprintf("%s -> %s", old, new) + case tracev2.EventRegionBegin: + return fmt.Sprintf("region %q begin", ev.Region().Type) + case tracev2.EventRegionEnd: + return fmt.Sprintf("region %q end", ev.Region().Type) + case tracev2.EventTaskBegin: + t := ev.Task() + return fmt.Sprintf("task %q (D %d, parent %d) begin", t.Type, t.ID, t.Parent) + case tracev2.EventTaskEnd: + return "task end" + case tracev2.EventLog: + log := ev.Log() + if log.Category != "" { + return fmt.Sprintf("log %q", log.Message) + } + return fmt.Sprintf("log (category: %s): %q", log.Category, log.Message) + } + return "" +} + +func primaryGoroutine(ev *tracev2.Event) tracev2.GoID { + if ev.Kind() != tracev2.EventStateTransition { + return ev.Goroutine() + } + st := ev.StateTransition() + if st.Resource.Kind != tracev2.ResourceGoroutine { + return tracev2.NoGoroutine + } + return st.Resource.Goroutine() +} + +func elapsed(d time.Duration) string { + b := fmt.Appendf(nil, "%.9f", d.Seconds()) + + // For subsecond durations, blank all zeros before decimal point, + // and all zeros between the decimal point and the first non-zero digit. + if d < time.Second { + dot := bytes.IndexByte(b, '.') + for i := 0; i < dot; i++ { + b[i] = ' ' + } + for i := dot + 1; i < len(b); i++ { + if b[i] == '0' { + b[i] = ' ' + } else { + break + } + } + } + return string(b) +} + +func asMillisecond(d time.Duration) float64 { + return float64(d.Nanoseconds()) / float64(time.Millisecond) +} diff --git a/src/internal/trace/summary.go b/src/internal/trace/summary.go index f57146e950..934a980c7c 100644 --- a/src/internal/trace/summary.go +++ b/src/internal/trace/summary.go @@ -64,6 +64,12 @@ type UserTaskSummary struct { Goroutines map[tracev2.GoID]*GoroutineSummary } +// Complete returns true if we have complete information about the task +// from the trace: both a start and an end. +func (s *UserTaskSummary) Complete() bool { + return s.Start != nil && s.End != nil +} + // UserRegionSummary represents a region and goroutine execution stats // while the region was active. (For v2 traces.) type UserRegionSummary struct {
WhenElapsedGoroutineEvents
{{$el.WhenString}}{{$el.Duration}} + Task {{$el.ID}} + (goroutine view) + ({{if .Complete}}complete{{else}}incomplete{{end}}) +
{{.WhenString}}{{elapsed .Elapsed}}{{.Goroutine}}{{.What}}