From 2f6a25f4478905db5e169019bf9dc39ab2a50f89 Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Wed, 6 Dec 2023 14:20:49 -0500 Subject: [PATCH] internal/profile: remove legacy profile support internal/profile.Parse is only used in two places: cmd/compile for parsing PGO profiles, and net/http/pprof for parsing runtime/pprof profiles for delta profiles. Neither case ever encounters legacy profiles, so we can remove support entirely from the package. Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest Change-Id: Ic5f85b3fc1e1367131b6039dac9378913cbf9f2c Reviewed-on: https://go-review.googlesource.com/c/go/+/548035 Auto-Submit: Michael Pratt LUCI-TryBot-Result: Go LUCI Reviewed-by: Cherry Mui --- src/internal/profile/legacy_profile.go | 1268 ------------------------ src/internal/profile/profile.go | 63 +- src/internal/profile/profile_test.go | 64 -- src/runtime/pprof/proto_test.go | 31 +- 4 files changed, 24 insertions(+), 1402 deletions(-) delete mode 100644 src/internal/profile/legacy_profile.go delete mode 100644 src/internal/profile/profile_test.go diff --git a/src/internal/profile/legacy_profile.go b/src/internal/profile/legacy_profile.go deleted file mode 100644 index 373a6c04ca..0000000000 --- a/src/internal/profile/legacy_profile.go +++ /dev/null @@ -1,1268 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// This file implements parsers to convert legacy profiles into the -// profile.proto format. - -package profile - -import ( - "bufio" - "bytes" - "fmt" - "internal/lazyregexp" - "io" - "math" - "strconv" - "strings" -) - -var ( - countStartRE = lazyregexp.New(`\A(\w+) profile: total \d+\n\z`) - countRE = lazyregexp.New(`\A(\d+) @(( 0x[0-9a-f]+)+)\n\z`) - - heapHeaderRE = lazyregexp.New(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] *@ *(heap[_a-z0-9]*)/?(\d*)`) - heapSampleRE = lazyregexp.New(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+) *] @([ x0-9a-f]*)`) - - contentionSampleRE = lazyregexp.New(`(\d+) *(\d+) @([ x0-9a-f]*)`) - - hexNumberRE = lazyregexp.New(`0x[0-9a-f]+`) - - growthHeaderRE = lazyregexp.New(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ growthz`) - - fragmentationHeaderRE = lazyregexp.New(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ fragmentationz`) - - threadzStartRE = lazyregexp.New(`--- threadz \d+ ---`) - threadStartRE = lazyregexp.New(`--- Thread ([[:xdigit:]]+) \(name: (.*)/(\d+)\) stack: ---`) - - procMapsRE = lazyregexp.New(`([[:xdigit:]]+)-([[:xdigit:]]+)\s+([-rwxp]+)\s+([[:xdigit:]]+)\s+([[:xdigit:]]+):([[:xdigit:]]+)\s+([[:digit:]]+)\s*(\S+)?`) - - briefMapsRE = lazyregexp.New(`\s*([[:xdigit:]]+)-([[:xdigit:]]+):\s*(\S+)(\s.*@)?([[:xdigit:]]+)?`) - - // LegacyHeapAllocated instructs the heapz parsers to use the - // allocated memory stats instead of the default in-use memory. Note - // that tcmalloc doesn't provide all allocated memory, only in-use - // stats. - LegacyHeapAllocated bool -) - -func isSpaceOrComment(line string) bool { - trimmed := strings.TrimSpace(line) - return len(trimmed) == 0 || trimmed[0] == '#' -} - -// parseGoCount parses a Go count profile (e.g., threadcreate or -// goroutine) and returns a new Profile. -func parseGoCount(b []byte) (*Profile, error) { - r := bytes.NewBuffer(b) - - var line string - var err error - for { - // Skip past comments and empty lines seeking a real header. - line, err = r.ReadString('\n') - if err != nil { - return nil, err - } - if !isSpaceOrComment(line) { - break - } - } - - m := countStartRE.FindStringSubmatch(line) - if m == nil { - return nil, errUnrecognized - } - profileType := m[1] - p := &Profile{ - PeriodType: &ValueType{Type: profileType, Unit: "count"}, - Period: 1, - SampleType: []*ValueType{{Type: profileType, Unit: "count"}}, - } - locations := make(map[uint64]*Location) - for { - line, err = r.ReadString('\n') - if err != nil { - if err == io.EOF { - break - } - return nil, err - } - if isSpaceOrComment(line) { - continue - } - if strings.HasPrefix(line, "---") { - break - } - m := countRE.FindStringSubmatch(line) - if m == nil { - return nil, errMalformed - } - n, err := strconv.ParseInt(m[1], 0, 64) - if err != nil { - return nil, errMalformed - } - fields := strings.Fields(m[2]) - locs := make([]*Location, 0, len(fields)) - for _, stk := range fields { - addr, err := strconv.ParseUint(stk, 0, 64) - if err != nil { - return nil, errMalformed - } - // Adjust all frames by -1 to land on the call instruction. - addr-- - loc := locations[addr] - if loc == nil { - loc = &Location{ - Address: addr, - } - locations[addr] = loc - p.Location = append(p.Location, loc) - } - locs = append(locs, loc) - } - p.Sample = append(p.Sample, &Sample{ - Location: locs, - Value: []int64{n}, - }) - } - - if err = parseAdditionalSections(strings.TrimSpace(line), r, p); err != nil { - return nil, err - } - return p, nil -} - -// remapLocationIDs ensures there is a location for each address -// referenced by a sample, and remaps the samples to point to the new -// location ids. -func (p *Profile) remapLocationIDs() { - seen := make(map[*Location]bool, len(p.Location)) - var locs []*Location - - for _, s := range p.Sample { - for _, l := range s.Location { - if seen[l] { - continue - } - l.ID = uint64(len(locs) + 1) - locs = append(locs, l) - seen[l] = true - } - } - p.Location = locs -} - -func (p *Profile) remapFunctionIDs() { - seen := make(map[*Function]bool, len(p.Function)) - var fns []*Function - - for _, l := range p.Location { - for _, ln := range l.Line { - fn := ln.Function - if fn == nil || seen[fn] { - continue - } - fn.ID = uint64(len(fns) + 1) - fns = append(fns, fn) - seen[fn] = true - } - } - p.Function = fns -} - -// remapMappingIDs matches location addresses with existing mappings -// and updates them appropriately. This is O(N*M), if this ever shows -// up as a bottleneck, evaluate sorting the mappings and doing a -// binary search, which would make it O(N*log(M)). -func (p *Profile) remapMappingIDs() { - if len(p.Mapping) == 0 { - return - } - - // Some profile handlers will incorrectly set regions for the main - // executable if its section is remapped. Fix them through heuristics. - - // Remove the initial mapping if named '/anon_hugepage' and has a - // consecutive adjacent mapping. - if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") { - if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start { - p.Mapping = p.Mapping[1:] - } - } - - for _, l := range p.Location { - if a := l.Address; a != 0 { - for _, m := range p.Mapping { - if m.Start <= a && a < m.Limit { - l.Mapping = m - break - } - } - } - } - - // Reset all mapping IDs. - for i, m := range p.Mapping { - m.ID = uint64(i + 1) - } -} - -var cpuInts = []func([]byte) (uint64, []byte){ - get32l, - get32b, - get64l, - get64b, -} - -func get32l(b []byte) (uint64, []byte) { - if len(b) < 4 { - return 0, nil - } - return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24, b[4:] -} - -func get32b(b []byte) (uint64, []byte) { - if len(b) < 4 { - return 0, nil - } - return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24, b[4:] -} - -func get64l(b []byte) (uint64, []byte) { - if len(b) < 8 { - return 0, nil - } - return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56, b[8:] -} - -func get64b(b []byte) (uint64, []byte) { - if len(b) < 8 { - return 0, nil - } - return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56, b[8:] -} - -// ParseTracebacks parses a set of tracebacks and returns a newly -// populated profile. It will accept any text file and generate a -// Profile out of it with any hex addresses it can identify, including -// a process map if it can recognize one. Each sample will include a -// tag "source" with the addresses recognized in string format. -func ParseTracebacks(b []byte) (*Profile, error) { - r := bytes.NewBuffer(b) - - p := &Profile{ - PeriodType: &ValueType{Type: "trace", Unit: "count"}, - Period: 1, - SampleType: []*ValueType{ - {Type: "trace", Unit: "count"}, - }, - } - - var sources []string - var sloc []*Location - - locs := make(map[uint64]*Location) - for { - l, err := r.ReadString('\n') - if err != nil { - if err != io.EOF { - return nil, err - } - if l == "" { - break - } - } - if sectionTrigger(l) == memoryMapSection { - break - } - if s, addrs := extractHexAddresses(l); len(s) > 0 { - for _, addr := range addrs { - // Addresses from stack traces point to the next instruction after - // each call. Adjust by -1 to land somewhere on the actual call. - addr-- - loc := locs[addr] - if locs[addr] == nil { - loc = &Location{ - Address: addr, - } - p.Location = append(p.Location, loc) - locs[addr] = loc - } - sloc = append(sloc, loc) - } - - sources = append(sources, s...) - } else { - if len(sources) > 0 || len(sloc) > 0 { - addTracebackSample(sloc, sources, p) - sloc, sources = nil, nil - } - } - } - - // Add final sample to save any leftover data. - if len(sources) > 0 || len(sloc) > 0 { - addTracebackSample(sloc, sources, p) - } - - if err := p.ParseMemoryMap(r); err != nil { - return nil, err - } - return p, nil -} - -func addTracebackSample(l []*Location, s []string, p *Profile) { - p.Sample = append(p.Sample, - &Sample{ - Value: []int64{1}, - Location: l, - Label: map[string][]string{"source": s}, - }) -} - -// parseCPU parses a profilez legacy profile and returns a newly -// populated Profile. -// -// The general format for profilez samples is a sequence of words in -// binary format. The first words are a header with the following data: -// -// 1st word -- 0 -// 2nd word -- 3 -// 3rd word -- 0 if a c++ application, 1 if a java application. -// 4th word -- Sampling period (in microseconds). -// 5th word -- Padding. -func parseCPU(b []byte) (*Profile, error) { - var parse func([]byte) (uint64, []byte) - var n1, n2, n3, n4, n5 uint64 - for _, parse = range cpuInts { - var tmp []byte - n1, tmp = parse(b) - n2, tmp = parse(tmp) - n3, tmp = parse(tmp) - n4, tmp = parse(tmp) - n5, tmp = parse(tmp) - - if tmp != nil && n1 == 0 && n2 == 3 && n3 == 0 && n4 > 0 && n5 == 0 { - b = tmp - return cpuProfile(b, int64(n4), parse) - } - } - return nil, errUnrecognized -} - -// cpuProfile returns a new Profile from C++ profilez data. -// b is the profile bytes after the header, period is the profiling -// period, and parse is a function to parse 8-byte chunks from the -// profile in its native endianness. -func cpuProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) { - p := &Profile{ - Period: period * 1000, - PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"}, - SampleType: []*ValueType{ - {Type: "samples", Unit: "count"}, - {Type: "cpu", Unit: "nanoseconds"}, - }, - } - var err error - if b, _, err = parseCPUSamples(b, parse, true, p); err != nil { - return nil, err - } - - // If all samples have the same second-to-the-bottom frame, it - // strongly suggests that it is an uninteresting artifact of - // measurement -- a stack frame pushed by the signal handler. The - // bottom frame is always correct as it is picked up from the signal - // structure, not the stack. Check if this is the case and if so, - // remove. - if len(p.Sample) > 1 && len(p.Sample[0].Location) > 1 { - allSame := true - id1 := p.Sample[0].Location[1].Address - for _, s := range p.Sample { - if len(s.Location) < 2 || id1 != s.Location[1].Address { - allSame = false - break - } - } - if allSame { - for _, s := range p.Sample { - s.Location = append(s.Location[:1], s.Location[2:]...) - } - } - } - - if err := p.ParseMemoryMap(bytes.NewBuffer(b)); err != nil { - return nil, err - } - return p, nil -} - -// parseCPUSamples parses a collection of profilez samples from a -// profile. -// -// profilez samples are a repeated sequence of stack frames of the -// form: -// -// 1st word -- The number of times this stack was encountered. -// 2nd word -- The size of the stack (StackSize). -// 3rd word -- The first address on the stack. -// ... -// StackSize + 2 -- The last address on the stack -// -// The last stack trace is of the form: -// -// 1st word -- 0 -// 2nd word -- 1 -// 3rd word -- 0 -// -// Addresses from stack traces may point to the next instruction after -// each call. Optionally adjust by -1 to land somewhere on the actual -// call (except for the leaf, which is not a call). -func parseCPUSamples(b []byte, parse func(b []byte) (uint64, []byte), adjust bool, p *Profile) ([]byte, map[uint64]*Location, error) { - locs := make(map[uint64]*Location) - for len(b) > 0 { - var count, nstk uint64 - count, b = parse(b) - nstk, b = parse(b) - if b == nil || nstk > uint64(len(b)/4) { - return nil, nil, errUnrecognized - } - var sloc []*Location - addrs := make([]uint64, nstk) - for i := 0; i < int(nstk); i++ { - addrs[i], b = parse(b) - } - - if count == 0 && nstk == 1 && addrs[0] == 0 { - // End of data marker - break - } - for i, addr := range addrs { - if adjust && i > 0 { - addr-- - } - loc := locs[addr] - if loc == nil { - loc = &Location{ - Address: addr, - } - locs[addr] = loc - p.Location = append(p.Location, loc) - } - sloc = append(sloc, loc) - } - p.Sample = append(p.Sample, - &Sample{ - Value: []int64{int64(count), int64(count) * p.Period}, - Location: sloc, - }) - } - // Reached the end without finding the EOD marker. - return b, locs, nil -} - -// parseHeap parses a heapz legacy or a growthz profile and -// returns a newly populated Profile. -func parseHeap(b []byte) (p *Profile, err error) { - r := bytes.NewBuffer(b) - l, err := r.ReadString('\n') - if err != nil { - return nil, errUnrecognized - } - - sampling := "" - - if header := heapHeaderRE.FindStringSubmatch(l); header != nil { - p = &Profile{ - SampleType: []*ValueType{ - {Type: "objects", Unit: "count"}, - {Type: "space", Unit: "bytes"}, - }, - PeriodType: &ValueType{Type: "objects", Unit: "bytes"}, - } - - var period int64 - if len(header[6]) > 0 { - if period, err = strconv.ParseInt(header[6], 10, 64); err != nil { - return nil, errUnrecognized - } - } - - switch header[5] { - case "heapz_v2", "heap_v2": - sampling, p.Period = "v2", period - case "heapprofile": - sampling, p.Period = "", 1 - case "heap": - sampling, p.Period = "v2", period/2 - default: - return nil, errUnrecognized - } - } else if header = growthHeaderRE.FindStringSubmatch(l); header != nil { - p = &Profile{ - SampleType: []*ValueType{ - {Type: "objects", Unit: "count"}, - {Type: "space", Unit: "bytes"}, - }, - PeriodType: &ValueType{Type: "heapgrowth", Unit: "count"}, - Period: 1, - } - } else if header = fragmentationHeaderRE.FindStringSubmatch(l); header != nil { - p = &Profile{ - SampleType: []*ValueType{ - {Type: "objects", Unit: "count"}, - {Type: "space", Unit: "bytes"}, - }, - PeriodType: &ValueType{Type: "allocations", Unit: "count"}, - Period: 1, - } - } else { - return nil, errUnrecognized - } - - if LegacyHeapAllocated { - for _, st := range p.SampleType { - st.Type = "alloc_" + st.Type - } - } else { - for _, st := range p.SampleType { - st.Type = "inuse_" + st.Type - } - } - - locs := make(map[uint64]*Location) - for { - l, err = r.ReadString('\n') - if err != nil { - if err != io.EOF { - return nil, err - } - - if l == "" { - break - } - } - - if isSpaceOrComment(l) { - continue - } - l = strings.TrimSpace(l) - - if sectionTrigger(l) != unrecognizedSection { - break - } - - value, blocksize, addrs, err := parseHeapSample(l, p.Period, sampling) - if err != nil { - return nil, err - } - var sloc []*Location - for _, addr := range addrs { - // Addresses from stack traces point to the next instruction after - // each call. Adjust by -1 to land somewhere on the actual call. - addr-- - loc := locs[addr] - if locs[addr] == nil { - loc = &Location{ - Address: addr, - } - p.Location = append(p.Location, loc) - locs[addr] = loc - } - sloc = append(sloc, loc) - } - - p.Sample = append(p.Sample, &Sample{ - Value: value, - Location: sloc, - NumLabel: map[string][]int64{"bytes": {blocksize}}, - }) - } - - if err = parseAdditionalSections(l, r, p); err != nil { - return nil, err - } - return p, nil -} - -// parseHeapSample parses a single row from a heap profile into a new Sample. -func parseHeapSample(line string, rate int64, sampling string) (value []int64, blocksize int64, addrs []uint64, err error) { - sampleData := heapSampleRE.FindStringSubmatch(line) - if len(sampleData) != 6 { - return value, blocksize, addrs, fmt.Errorf("unexpected number of sample values: got %d, want 6", len(sampleData)) - } - - // Use first two values by default; tcmalloc sampling generates the - // same value for both, only the older heap-profile collect separate - // stats for in-use and allocated objects. - valueIndex := 1 - if LegacyHeapAllocated { - valueIndex = 3 - } - - var v1, v2 int64 - if v1, err = strconv.ParseInt(sampleData[valueIndex], 10, 64); err != nil { - return value, blocksize, addrs, fmt.Errorf("malformed sample: %s: %v", line, err) - } - if v2, err = strconv.ParseInt(sampleData[valueIndex+1], 10, 64); err != nil { - return value, blocksize, addrs, fmt.Errorf("malformed sample: %s: %v", line, err) - } - - if v1 == 0 { - if v2 != 0 { - return value, blocksize, addrs, fmt.Errorf("allocation count was 0 but allocation bytes was %d", v2) - } - } else { - blocksize = v2 / v1 - if sampling == "v2" { - v1, v2 = scaleHeapSample(v1, v2, rate) - } - } - - value = []int64{v1, v2} - addrs = parseHexAddresses(sampleData[5]) - - return value, blocksize, addrs, nil -} - -// extractHexAddresses extracts hex numbers from a string and returns -// them, together with their numeric value, in a slice. -func extractHexAddresses(s string) ([]string, []uint64) { - hexStrings := hexNumberRE.FindAllString(s, -1) - var ids []uint64 - for _, s := range hexStrings { - if id, err := strconv.ParseUint(s, 0, 64); err == nil { - ids = append(ids, id) - } else { - // Do not expect any parsing failures due to the regexp matching. - panic("failed to parse hex value:" + s) - } - } - return hexStrings, ids -} - -// parseHexAddresses parses hex numbers from a string and returns them -// in a slice. -func parseHexAddresses(s string) []uint64 { - _, ids := extractHexAddresses(s) - return ids -} - -// scaleHeapSample adjusts the data from a heapz Sample to -// account for its probability of appearing in the collected -// data. heapz profiles are a sampling of the memory allocations -// requests in a program. We estimate the unsampled value by dividing -// each collected sample by its probability of appearing in the -// profile. heapz v2 profiles rely on a poisson process to determine -// which samples to collect, based on the desired average collection -// rate R. The probability of a sample of size S to appear in that -// profile is 1-exp(-S/R). -func scaleHeapSample(count, size, rate int64) (int64, int64) { - if count == 0 || size == 0 { - return 0, 0 - } - - if rate <= 1 { - // if rate==1 all samples were collected so no adjustment is needed. - // if rate<1 treat as unknown and skip scaling. - return count, size - } - - avgSize := float64(size) / float64(count) - scale := 1 / (1 - math.Exp(-avgSize/float64(rate))) - - return int64(float64(count) * scale), int64(float64(size) * scale) -} - -// parseContention parses a mutex or contention profile. There are 2 cases: -// "--- contentionz " for legacy C++ profiles (and backwards compatibility) -// "--- mutex:" or "--- contention:" for profiles generated by the Go runtime. -// This code converts the text output from runtime into a *Profile. (In the future -// the runtime might write a serialized Profile directly making this unnecessary.) -func parseContention(b []byte) (*Profile, error) { - r := bytes.NewBuffer(b) - var l string - var err error - for { - // Skip past comments and empty lines seeking a real header. - l, err = r.ReadString('\n') - if err != nil { - return nil, err - } - if !isSpaceOrComment(l) { - break - } - } - - if strings.HasPrefix(l, "--- contentionz ") { - return parseCppContention(r) - } else if strings.HasPrefix(l, "--- mutex:") { - return parseCppContention(r) - } else if strings.HasPrefix(l, "--- contention:") { - return parseCppContention(r) - } - return nil, errUnrecognized -} - -// parseCppContention parses the output from synchronization_profiling.cc -// for backward compatibility, and the compatible (non-debug) block profile -// output from the Go runtime. -func parseCppContention(r *bytes.Buffer) (*Profile, error) { - p := &Profile{ - PeriodType: &ValueType{Type: "contentions", Unit: "count"}, - Period: 1, - SampleType: []*ValueType{ - {Type: "contentions", Unit: "count"}, - {Type: "delay", Unit: "nanoseconds"}, - }, - } - - var cpuHz int64 - var l string - var err error - // Parse text of the form "attribute = value" before the samples. - const delimiter = '=' - for { - l, err = r.ReadString('\n') - if err != nil { - if err != io.EOF { - return nil, err - } - - if l == "" { - break - } - } - if isSpaceOrComment(l) { - continue - } - - if l = strings.TrimSpace(l); l == "" { - continue - } - - if strings.HasPrefix(l, "---") { - break - } - - index := strings.IndexByte(l, delimiter) - if index < 0 { - break - } - key := l[:index] - val := l[index+1:] - - key, val = strings.TrimSpace(key), strings.TrimSpace(val) - var err error - switch key { - case "cycles/second": - if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil { - return nil, errUnrecognized - } - case "sampling period": - if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil { - return nil, errUnrecognized - } - case "ms since reset": - ms, err := strconv.ParseInt(val, 0, 64) - if err != nil { - return nil, errUnrecognized - } - p.DurationNanos = ms * 1000 * 1000 - case "format": - // CPP contentionz profiles don't have format. - return nil, errUnrecognized - case "resolution": - // CPP contentionz profiles don't have resolution. - return nil, errUnrecognized - case "discarded samples": - default: - return nil, errUnrecognized - } - } - - locs := make(map[uint64]*Location) - for { - if !isSpaceOrComment(l) { - if l = strings.TrimSpace(l); strings.HasPrefix(l, "---") { - break - } - value, addrs, err := parseContentionSample(l, p.Period, cpuHz) - if err != nil { - return nil, err - } - var sloc []*Location - for _, addr := range addrs { - // Addresses from stack traces point to the next instruction after - // each call. Adjust by -1 to land somewhere on the actual call. - addr-- - loc := locs[addr] - if locs[addr] == nil { - loc = &Location{ - Address: addr, - } - p.Location = append(p.Location, loc) - locs[addr] = loc - } - sloc = append(sloc, loc) - } - p.Sample = append(p.Sample, &Sample{ - Value: value, - Location: sloc, - }) - } - - if l, err = r.ReadString('\n'); err != nil { - if err != io.EOF { - return nil, err - } - if l == "" { - break - } - } - } - - if err = parseAdditionalSections(l, r, p); err != nil { - return nil, err - } - - return p, nil -} - -// parseContentionSample parses a single row from a contention profile -// into a new Sample. -func parseContentionSample(line string, period, cpuHz int64) (value []int64, addrs []uint64, err error) { - sampleData := contentionSampleRE.FindStringSubmatch(line) - if sampleData == nil { - return value, addrs, errUnrecognized - } - - v1, err := strconv.ParseInt(sampleData[1], 10, 64) - if err != nil { - return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err) - } - v2, err := strconv.ParseInt(sampleData[2], 10, 64) - if err != nil { - return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err) - } - - // Unsample values if period and cpuHz are available. - // - Delays are scaled to cycles and then to nanoseconds. - // - Contentions are scaled to cycles. - if period > 0 { - if cpuHz > 0 { - cpuGHz := float64(cpuHz) / 1e9 - v1 = int64(float64(v1) * float64(period) / cpuGHz) - } - v2 = v2 * period - } - - value = []int64{v2, v1} - addrs = parseHexAddresses(sampleData[3]) - - return value, addrs, nil -} - -// parseThread parses a Threadz profile and returns a new Profile. -func parseThread(b []byte) (*Profile, error) { - r := bytes.NewBuffer(b) - - var line string - var err error - for { - // Skip past comments and empty lines seeking a real header. - line, err = r.ReadString('\n') - if err != nil { - return nil, err - } - if !isSpaceOrComment(line) { - break - } - } - - if m := threadzStartRE.FindStringSubmatch(line); m != nil { - // Advance over initial comments until first stack trace. - for { - line, err = r.ReadString('\n') - if err != nil { - if err != io.EOF { - return nil, err - } - - if line == "" { - break - } - } - if sectionTrigger(line) != unrecognizedSection || line[0] == '-' { - break - } - } - } else if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 { - return nil, errUnrecognized - } - - p := &Profile{ - SampleType: []*ValueType{{Type: "thread", Unit: "count"}}, - PeriodType: &ValueType{Type: "thread", Unit: "count"}, - Period: 1, - } - - locs := make(map[uint64]*Location) - // Recognize each thread and populate profile samples. - for sectionTrigger(line) == unrecognizedSection { - if strings.HasPrefix(line, "---- no stack trace for") { - line = "" - break - } - if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 { - return nil, errUnrecognized - } - - var addrs []uint64 - line, addrs, err = parseThreadSample(r) - if err != nil { - return nil, errUnrecognized - } - if len(addrs) == 0 { - // We got a --same as previous threads--. Bump counters. - if len(p.Sample) > 0 { - s := p.Sample[len(p.Sample)-1] - s.Value[0]++ - } - continue - } - - var sloc []*Location - for _, addr := range addrs { - // Addresses from stack traces point to the next instruction after - // each call. Adjust by -1 to land somewhere on the actual call. - addr-- - loc := locs[addr] - if locs[addr] == nil { - loc = &Location{ - Address: addr, - } - p.Location = append(p.Location, loc) - locs[addr] = loc - } - sloc = append(sloc, loc) - } - - p.Sample = append(p.Sample, &Sample{ - Value: []int64{1}, - Location: sloc, - }) - } - - if err = parseAdditionalSections(line, r, p); err != nil { - return nil, err - } - - return p, nil -} - -// parseThreadSample parses a symbolized or unsymbolized stack trace. -// Returns the first line after the traceback, the sample (or nil if -// it hits a 'same-as-previous' marker) and an error. -func parseThreadSample(b *bytes.Buffer) (nextl string, addrs []uint64, err error) { - var l string - sameAsPrevious := false - for { - if l, err = b.ReadString('\n'); err != nil { - if err != io.EOF { - return "", nil, err - } - if l == "" { - break - } - } - if l = strings.TrimSpace(l); l == "" { - continue - } - - if strings.HasPrefix(l, "---") { - break - } - if strings.Contains(l, "same as previous thread") { - sameAsPrevious = true - continue - } - - addrs = append(addrs, parseHexAddresses(l)...) - } - - if sameAsPrevious { - return l, nil, nil - } - return l, addrs, nil -} - -// parseAdditionalSections parses any additional sections in the -// profile, ignoring any unrecognized sections. -func parseAdditionalSections(l string, b *bytes.Buffer, p *Profile) (err error) { - for { - if sectionTrigger(l) == memoryMapSection { - break - } - // Ignore any unrecognized sections. - if l, err := b.ReadString('\n'); err != nil { - if err != io.EOF { - return err - } - if l == "" { - break - } - } - } - return p.ParseMemoryMap(b) -} - -// ParseMemoryMap parses a memory map in the format of -// /proc/self/maps, and overrides the mappings in the current profile. -// It renumbers the samples and locations in the profile correspondingly. -func (p *Profile) ParseMemoryMap(rd io.Reader) error { - b := bufio.NewReader(rd) - - var attrs []string - var r *strings.Replacer - const delimiter = '=' - for { - l, err := b.ReadString('\n') - if err != nil { - if err != io.EOF { - return err - } - if l == "" { - break - } - } - if l = strings.TrimSpace(l); l == "" { - continue - } - - if r != nil { - l = r.Replace(l) - } - m, err := parseMappingEntry(l) - if err != nil { - if err == errUnrecognized { - // Recognize assignments of the form: attr=value, and replace - // $attr with value on subsequent mappings. - idx := strings.IndexByte(l, delimiter) - if idx >= 0 { - attr := l[:idx] - value := l[idx+1:] - attrs = append(attrs, "$"+strings.TrimSpace(attr), strings.TrimSpace(value)) - r = strings.NewReplacer(attrs...) - } - // Ignore any unrecognized entries - continue - } - return err - } - if m == nil || (m.File == "" && len(p.Mapping) != 0) { - // In some cases the first entry may include the address range - // but not the name of the file. It should be followed by - // another entry with the name. - continue - } - if len(p.Mapping) == 1 && p.Mapping[0].File == "" { - // Update the name if this is the entry following that empty one. - p.Mapping[0].File = m.File - continue - } - p.Mapping = append(p.Mapping, m) - } - p.remapLocationIDs() - p.remapFunctionIDs() - p.remapMappingIDs() - return nil -} - -func parseMappingEntry(l string) (*Mapping, error) { - mapping := &Mapping{} - var err error - if me := procMapsRE.FindStringSubmatch(l); len(me) == 9 { - if !strings.Contains(me[3], "x") { - // Skip non-executable entries. - return nil, nil - } - if mapping.Start, err = strconv.ParseUint(me[1], 16, 64); err != nil { - return nil, errUnrecognized - } - if mapping.Limit, err = strconv.ParseUint(me[2], 16, 64); err != nil { - return nil, errUnrecognized - } - if me[4] != "" { - if mapping.Offset, err = strconv.ParseUint(me[4], 16, 64); err != nil { - return nil, errUnrecognized - } - } - mapping.File = me[8] - return mapping, nil - } - - if me := briefMapsRE.FindStringSubmatch(l); len(me) == 6 { - if mapping.Start, err = strconv.ParseUint(me[1], 16, 64); err != nil { - return nil, errUnrecognized - } - if mapping.Limit, err = strconv.ParseUint(me[2], 16, 64); err != nil { - return nil, errUnrecognized - } - mapping.File = me[3] - if me[5] != "" { - if mapping.Offset, err = strconv.ParseUint(me[5], 16, 64); err != nil { - return nil, errUnrecognized - } - } - return mapping, nil - } - - return nil, errUnrecognized -} - -type sectionType int - -const ( - unrecognizedSection sectionType = iota - memoryMapSection -) - -var memoryMapTriggers = []string{ - "--- Memory map: ---", - "MAPPED_LIBRARIES:", -} - -func sectionTrigger(line string) sectionType { - for _, trigger := range memoryMapTriggers { - if strings.Contains(line, trigger) { - return memoryMapSection - } - } - return unrecognizedSection -} - -func (p *Profile) addLegacyFrameInfo() { - switch { - case isProfileType(p, heapzSampleTypes) || - isProfileType(p, heapzInUseSampleTypes) || - isProfileType(p, heapzAllocSampleTypes): - p.DropFrames, p.KeepFrames = allocRxStr, allocSkipRxStr - case isProfileType(p, contentionzSampleTypes): - p.DropFrames, p.KeepFrames = lockRxStr, "" - default: - p.DropFrames, p.KeepFrames = cpuProfilerRxStr, "" - } -} - -var heapzSampleTypes = []string{"allocations", "size"} // early Go pprof profiles -var heapzInUseSampleTypes = []string{"inuse_objects", "inuse_space"} -var heapzAllocSampleTypes = []string{"alloc_objects", "alloc_space"} -var contentionzSampleTypes = []string{"contentions", "delay"} - -func isProfileType(p *Profile, t []string) bool { - st := p.SampleType - if len(st) != len(t) { - return false - } - - for i := range st { - if st[i].Type != t[i] { - return false - } - } - return true -} - -var allocRxStr = strings.Join([]string{ - // POSIX entry points. - `calloc`, - `cfree`, - `malloc`, - `free`, - `memalign`, - `do_memalign`, - `(__)?posix_memalign`, - `pvalloc`, - `valloc`, - `realloc`, - - // TC malloc. - `tcmalloc::.*`, - `tc_calloc`, - `tc_cfree`, - `tc_malloc`, - `tc_free`, - `tc_memalign`, - `tc_posix_memalign`, - `tc_pvalloc`, - `tc_valloc`, - `tc_realloc`, - `tc_new`, - `tc_delete`, - `tc_newarray`, - `tc_deletearray`, - `tc_new_nothrow`, - `tc_newarray_nothrow`, - - // Memory-allocation routines on OS X. - `malloc_zone_malloc`, - `malloc_zone_calloc`, - `malloc_zone_valloc`, - `malloc_zone_realloc`, - `malloc_zone_memalign`, - `malloc_zone_free`, - - // Go runtime - `runtime\..*`, - - // Other misc. memory allocation routines - `BaseArena::.*`, - `(::)?do_malloc_no_errno`, - `(::)?do_malloc_pages`, - `(::)?do_malloc`, - `DoSampledAllocation`, - `MallocedMemBlock::MallocedMemBlock`, - `_M_allocate`, - `__builtin_(vec_)?delete`, - `__builtin_(vec_)?new`, - `__gnu_cxx::new_allocator::allocate`, - `__libc_malloc`, - `__malloc_alloc_template::allocate`, - `allocate`, - `cpp_alloc`, - `operator new(\[\])?`, - `simple_alloc::allocate`, -}, `|`) - -var allocSkipRxStr = strings.Join([]string{ - // Preserve Go runtime frames that appear in the middle/bottom of - // the stack. - `runtime\.panic`, - `runtime\.reflectcall`, - `runtime\.call[0-9]*`, -}, `|`) - -var cpuProfilerRxStr = strings.Join([]string{ - `ProfileData::Add`, - `ProfileData::prof_handler`, - `CpuProfiler::prof_handler`, - `__pthread_sighandler`, - `__restore`, -}, `|`) - -var lockRxStr = strings.Join([]string{ - `RecordLockProfileData`, - `(base::)?RecordLockProfileData.*`, - `(base::)?SubmitMutexProfileData.*`, - `(base::)?SubmitSpinLockProfileData.*`, - `(Mutex::)?AwaitCommon.*`, - `(Mutex::)?Unlock.*`, - `(Mutex::)?UnlockSlow.*`, - `(Mutex::)?ReaderUnlock.*`, - `(MutexLock::)?~MutexLock.*`, - `(SpinLock::)?Unlock.*`, - `(SpinLock::)?SlowUnlock.*`, - `(SpinLockHolder::)?~SpinLockHolder.*`, -}, `|`) diff --git a/src/internal/profile/profile.go b/src/internal/profile/profile.go index 02d1bed3de..db323cc0d9 100644 --- a/src/internal/profile/profile.go +++ b/src/internal/profile/profile.go @@ -11,7 +11,6 @@ import ( "bytes" "compress/gzip" "fmt" - "internal/lazyregexp" "io" "strings" "time" @@ -120,16 +119,14 @@ type Function struct { filenameX int64 } -// Parse parses a profile and checks for its validity. The input -// may be a gzip-compressed encoded protobuf or one of many legacy -// profile formats which may be unsupported in the future. +// Parse parses a profile and checks for its validity. The input must be an +// encoded pprof protobuf, which may optionally be gzip-compressed. func Parse(r io.Reader) (*Profile, error) { orig, err := io.ReadAll(r) if err != nil { return nil, err } - var p *Profile if len(orig) >= 2 && orig[0] == 0x1f && orig[1] == 0x8b { gz, err := gzip.NewReader(bytes.NewBuffer(orig)) if err != nil { @@ -142,13 +139,9 @@ func Parse(r io.Reader) (*Profile, error) { orig = data } - var lErr error - p, pErr := parseUncompressed(orig) - if pErr != nil { - p, lErr = parseLegacy(orig) - } - if pErr != nil && lErr != nil { - return nil, fmt.Errorf("parsing profile: not a valid proto profile (%w) or legacy profile (%w)", pErr, lErr) + p, err := parseUncompressed(orig) + if err != nil { + return nil, fmt.Errorf("parsing profile: %w", err) } if err := p.CheckValid(); err != nil { @@ -161,29 +154,6 @@ var errUnrecognized = fmt.Errorf("unrecognized profile format") var errMalformed = fmt.Errorf("malformed profile format") var ErrNoData = fmt.Errorf("empty input file") -func parseLegacy(data []byte) (*Profile, error) { - parsers := []func([]byte) (*Profile, error){ - parseCPU, - parseHeap, - parseGoCount, // goroutine, threadcreate - parseThread, - parseContention, - } - - for _, parser := range parsers { - p, err := parser(data) - if err == nil { - p.setMain() - p.addLegacyFrameInfo() - return p, nil - } - if err != errUnrecognized { - return nil, err - } - } - return nil, errUnrecognized -} - func parseUncompressed(data []byte) (*Profile, error) { if len(data) == 0 { return nil, ErrNoData @@ -201,29 +171,6 @@ func parseUncompressed(data []byte) (*Profile, error) { return p, nil } -var libRx = lazyregexp.New(`([.]so$|[.]so[._][0-9]+)`) - -// setMain scans Mapping entries and guesses which entry is main -// because legacy profiles don't obey the convention of putting main -// first. -func (p *Profile) setMain() { - for i := 0; i < len(p.Mapping); i++ { - file := strings.TrimSpace(strings.ReplaceAll(p.Mapping[i].File, "(deleted)", "")) - if len(file) == 0 { - continue - } - if len(libRx.FindStringSubmatch(file)) > 0 { - continue - } - if strings.HasPrefix(file, "[") { - continue - } - // Swap what we guess is main to position 0. - p.Mapping[i], p.Mapping[0] = p.Mapping[0], p.Mapping[i] - break - } -} - // Write writes the profile as a gzip-compressed marshaled protobuf. func (p *Profile) Write(w io.Writer) error { p.preEncode() diff --git a/src/internal/profile/profile_test.go b/src/internal/profile/profile_test.go deleted file mode 100644 index 84158b6233..0000000000 --- a/src/internal/profile/profile_test.go +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package profile - -import ( - "testing" -) - -func TestParseContention(t *testing.T) { - tests := []struct { - name string - in string - wantErr bool - }{ - { - name: "valid", - in: `--- mutex: -cycles/second=3491920901 -sampling period=1 -43227965305 1659640 @ 0x45e851 0x45f764 0x4a2be1 0x44ea31 -34035731690 15760 @ 0x45e851 0x45f764 0x4a2b17 0x44ea31 -`, - }, - { - name: "valid with comment", - in: `--- mutex: -cycles/second=3491920901 -sampling period=1 -43227965305 1659640 @ 0x45e851 0x45f764 0x4a2be1 0x44ea31 -# 0x45e850 sync.(*Mutex).Unlock+0x80 /go/src/sync/mutex.go:126 -# 0x45f763 sync.(*RWMutex).Unlock+0x83 /go/src/sync/rwmutex.go:125 -# 0x4a2be0 main.main.func3+0x70 /go/src/internal/pprof/profile/a_binary.go:58 - -34035731690 15760 @ 0x45e851 0x45f764 0x4a2b17 0x44ea31 -# 0x45e850 sync.(*Mutex).Unlock+0x80 /go/src/sync/mutex.go:126 -# 0x45f763 sync.(*RWMutex).Unlock+0x83 /go/src/sync/rwmutex.go:125 -# 0x4a2b16 main.main.func2+0xd6 /go/src/internal/pprof/profile/a_binary.go:48 -`, - }, - { - name: "empty", - in: `--- mutex:`, - wantErr: true, - }, - { - name: "invalid header", - in: `--- channel: -43227965305 1659640 @ 0x45e851 0x45f764 0x4a2be1 0x44ea31`, - wantErr: true, - }, - } - for _, tc := range tests { - _, err := parseContention([]byte(tc.in)) - if tc.wantErr && err == nil { - t.Errorf("parseContention(%q) succeeded unexpectedly", tc.name) - } - if !tc.wantErr && err != nil { - t.Errorf("parseContention(%q) failed unexpectedly: %v", tc.name, err) - } - } - -} diff --git a/src/runtime/pprof/proto_test.go b/src/runtime/pprof/proto_test.go index 85cd066946..caaaa45f12 100644 --- a/src/runtime/pprof/proto_test.go +++ b/src/runtime/pprof/proto_test.go @@ -86,21 +86,28 @@ func testPCs(t *testing.T) (addr1, addr2 uint64, map1, map2 *profile.Mapping) { if err != nil { t.Fatal(err) } - mprof := &profile.Profile{} - if err = mprof.ParseMemoryMap(bytes.NewReader(mmap)); err != nil { - t.Fatalf("parsing /proc/self/maps: %v", err) - } - if len(mprof.Mapping) < 2 { + var mappings []*profile.Mapping + id := uint64(1) + parseProcSelfMaps(mmap, func(lo, hi, offset uint64, file, buildID string) { + mappings = append(mappings, &profile.Mapping{ + ID: id, + Start: lo, + Limit: hi, + Offset: offset, + File: file, + BuildID: buildID, + }) + id++ + }) + if len(mappings) < 2 { // It is possible for a binary to only have 1 executable // region of memory. - t.Skipf("need 2 or more mappings, got %v", len(mprof.Mapping)) + t.Skipf("need 2 or more mappings, got %v", len(mappings)) } - addr1 = mprof.Mapping[0].Start - map1 = mprof.Mapping[0] - map1.BuildID, _ = elfBuildID(map1.File) - addr2 = mprof.Mapping[1].Start - map2 = mprof.Mapping[1] - map2.BuildID, _ = elfBuildID(map2.File) + addr1 = mappings[0].Start + map1 = mappings[0] + addr2 = mappings[1].Start + map2 = mappings[1] case "windows", "darwin", "ios": addr1 = uint64(abi.FuncPCABIInternal(f1)) addr2 = uint64(abi.FuncPCABIInternal(f2)) -- 2.48.1