--- /dev/null
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package oldtrace implements a parser for Go execution traces from versions
+// 1.11–1.21.
+//
+// The package started as a copy of Go 1.19's internal/trace, but has been
+// optimized to be faster while using less memory and fewer allocations. It has
+// been further modified for the specific purpose of converting traces to the
+// new 1.22+ format.
+package oldtrace
+
+import (
+ "bytes"
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "internal/trace/v2/event"
+ "internal/trace/v2/version"
+ "io"
+ "math"
+ "sort"
+)
+
+// Timestamp represents a count of nanoseconds since the beginning of the trace.
+// They can only be meaningfully compared with other timestamps from the same
+// trace.
+type Timestamp int64
+
+// Event describes one event in the trace.
+type Event struct {
+ // The Event type is carefully laid out to optimize its size and to avoid
+ // pointers, the latter so that the garbage collector won't have to scan any
+ // memory of our millions of events.
+
+ Ts Timestamp // timestamp in nanoseconds
+ G uint64 // G on which the event happened
+ Args [4]uint64 // event-type-specific arguments
+ StkID uint32 // unique stack ID
+ P int32 // P on which the event happened (can be a real P or one of TimerP, NetpollP, SyscallP)
+ Type event.Type // one of Ev*
+}
+
+// Frame is a frame in stack traces.
+type Frame struct {
+ PC uint64
+ // string ID of the function name
+ Fn uint64
+ // string ID of the file name
+ File uint64
+ Line int
+}
+
+const (
+ // Special P identifiers:
+ FakeP = 1000000 + iota
+ TimerP // contains timer unblocks
+ NetpollP // contains network unblocks
+ SyscallP // contains returns from syscalls
+ GCP // contains GC state
+ ProfileP // contains recording of CPU profile samples
+)
+
+// Trace is the result of Parse.
+type Trace struct {
+ Version version.Version
+
+ // Events is the sorted list of Events in the trace.
+ Events Events
+ // Stacks is the stack traces (stored as slices of PCs), keyed by stack IDs
+ // from the trace.
+ Stacks map[uint32][]uint64
+ PCs map[uint64]Frame
+ Strings map[uint64]string
+ InlineStrings []string
+}
+
+// batchOffset records the byte offset of, and number of events in, a batch. A
+// batch is a sequence of events emitted by a P. Events within a single batch
+// are sorted by time.
+type batchOffset struct {
+ offset int
+ numEvents int
+}
+
+type parser struct {
+ ver version.Version
+ data []byte
+ off int
+
+ strings map[uint64]string
+ inlineStrings []string
+ inlineStringsMapping map[string]int
+ // map from Ps to their batch offsets
+ batchOffsets map[int32][]batchOffset
+ stacks map[uint32][]uint64
+ stacksData []uint64
+ ticksPerSec int64
+ pcs map[uint64]Frame
+ cpuSamples []Event
+ timerGoids map[uint64]bool
+
+ // state for readRawEvent
+ args []uint64
+
+ // state for parseEvent
+ lastTs Timestamp
+ lastG uint64
+ // map from Ps to the last Gs that ran on them
+ lastGs map[int32]uint64
+ lastP int32
+}
+
+func (p *parser) discard(n uint64) bool {
+ if n > math.MaxInt {
+ return false
+ }
+ if noff := p.off + int(n); noff < p.off || noff > len(p.data) {
+ return false
+ } else {
+ p.off = noff
+ }
+ return true
+}
+
+func newParser(r io.Reader, ver version.Version) (*parser, error) {
+ var buf []byte
+ if seeker, ok := r.(io.Seeker); ok {
+ // Determine the size of the reader so that we can allocate a buffer
+ // without having to grow it later.
+ cur, err := seeker.Seek(0, io.SeekCurrent)
+ if err != nil {
+ return nil, err
+ }
+ end, err := seeker.Seek(0, io.SeekEnd)
+ if err != nil {
+ return nil, err
+ }
+ _, err = seeker.Seek(cur, io.SeekStart)
+ if err != nil {
+ return nil, err
+ }
+
+ buf = make([]byte, end-cur)
+ _, err = io.ReadFull(r, buf)
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ var err error
+ buf, err = io.ReadAll(r)
+ if err != nil {
+ return nil, err
+ }
+ }
+ return &parser{data: buf, ver: ver, timerGoids: make(map[uint64]bool)}, nil
+}
+
+// Parse parses Go execution traces from versions 1.11–1.21. The provided reader
+// will be read to completion and the entire trace will be materialized in
+// memory. That is, this function does not allow incremental parsing.
+//
+// The reader has to be positioned just after the trace header and vers needs to
+// be the version of the trace. This can be achieved by using
+// version.ReadHeader.
+func Parse(r io.Reader, vers version.Version) (Trace, error) {
+ // We accept the version as an argument because internal/trace/v2 will have
+ // already read the version to determine which parser to use.
+ p, err := newParser(r, vers)
+ if err != nil {
+ return Trace{}, err
+ }
+ return p.parse()
+}
+
+// parse parses, post-processes and verifies the trace.
+func (p *parser) parse() (Trace, error) {
+ defer func() {
+ p.data = nil
+ }()
+
+ // We parse a trace by running the following steps in order:
+ //
+ // 1. In the initial pass we collect information about batches (their
+ // locations and sizes.) We also parse CPU profiling samples in this
+ // step, simply to reduce the number of full passes that we need.
+ //
+ // 2. In the second pass we parse batches and merge them into a globally
+ // ordered event stream. This uses the batch information from the first
+ // pass to quickly find batches.
+ //
+ // 3. After all events have been parsed we convert their timestamps from CPU
+ // ticks to wall time. Furthermore we move timers and syscalls to
+ // dedicated, fake Ps.
+ //
+ // 4. Finally, we validate the trace.
+
+ p.strings = make(map[uint64]string)
+ p.batchOffsets = make(map[int32][]batchOffset)
+ p.lastGs = make(map[int32]uint64)
+ p.stacks = make(map[uint32][]uint64)
+ p.pcs = make(map[uint64]Frame)
+ p.inlineStringsMapping = make(map[string]int)
+
+ if err := p.collectBatchesAndCPUSamples(); err != nil {
+ return Trace{}, err
+ }
+
+ events, err := p.parseEventBatches()
+ if err != nil {
+ return Trace{}, err
+ }
+
+ if p.ticksPerSec == 0 {
+ return Trace{}, errors.New("no EvFrequency event")
+ }
+
+ if events.Len() > 0 {
+ // Translate cpu ticks to real time.
+ minTs := events.Ptr(0).Ts
+ // Use floating point to avoid integer overflows.
+ freq := 1e9 / float64(p.ticksPerSec)
+ for i := 0; i < events.Len(); i++ {
+ ev := events.Ptr(i)
+ ev.Ts = Timestamp(float64(ev.Ts-minTs) * freq)
+ // Move timers and syscalls to separate fake Ps.
+ if p.timerGoids[ev.G] && ev.Type == EvGoUnblock {
+ ev.P = TimerP
+ }
+ if ev.Type == EvGoSysExit {
+ ev.P = SyscallP
+ }
+ }
+ }
+
+ if err := p.postProcessTrace(events); err != nil {
+ return Trace{}, err
+ }
+
+ res := Trace{
+ Version: p.ver,
+ Events: events,
+ Stacks: p.stacks,
+ Strings: p.strings,
+ InlineStrings: p.inlineStrings,
+ PCs: p.pcs,
+ }
+ return res, nil
+}
+
+// rawEvent is a helper type used during parsing.
+type rawEvent struct {
+ typ event.Type
+ args []uint64
+ sargs []string
+
+ // if typ == EvBatch, these fields describe the batch.
+ batchPid int32
+ batchOffset int
+}
+
+type proc struct {
+ pid int32
+ // the remaining events in the current batch
+ events []Event
+ // buffer for reading batches into, aliased by proc.events
+ buf []Event
+
+ // there are no more batches left
+ done bool
+}
+
+const eventsBucketSize = 524288 // 32 MiB of events
+
+type Events struct {
+ // Events is a slice of slices that grows one slice of size eventsBucketSize
+ // at a time. This avoids the O(n) cost of slice growth in append, and
+ // additionally allows consumers to drop references to parts of the data,
+ // freeing memory piecewise.
+ n int
+ buckets []*[eventsBucketSize]Event
+ off int
+}
+
+// grow grows the slice by one and returns a pointer to the new element, without
+// overwriting it.
+func (l *Events) grow() *Event {
+ a, b := l.index(l.n)
+ if a >= len(l.buckets) {
+ l.buckets = append(l.buckets, new([eventsBucketSize]Event))
+ }
+ ptr := &l.buckets[a][b]
+ l.n++
+ return ptr
+}
+
+// append appends v to the slice and returns a pointer to the new element.
+func (l *Events) append(v Event) *Event {
+ ptr := l.grow()
+ *ptr = v
+ return ptr
+}
+
+func (l *Events) Ptr(i int) *Event {
+ a, b := l.index(i + l.off)
+ return &l.buckets[a][b]
+}
+
+func (l *Events) index(i int) (int, int) {
+ // Doing the division on uint instead of int compiles this function to a
+ // shift and an AND (for power of 2 bucket sizes), versus a whole bunch of
+ // instructions for int.
+ return int(uint(i) / eventsBucketSize), int(uint(i) % eventsBucketSize)
+}
+
+func (l *Events) Len() int {
+ return l.n - l.off
+}
+
+func (l *Events) Less(i, j int) bool {
+ return l.Ptr(i).Ts < l.Ptr(j).Ts
+}
+
+func (l *Events) Swap(i, j int) {
+ *l.Ptr(i), *l.Ptr(j) = *l.Ptr(j), *l.Ptr(i)
+}
+
+func (l *Events) Pop() (*Event, bool) {
+ if l.off == l.n {
+ return nil, false
+ }
+ a, b := l.index(l.off)
+ ptr := &l.buckets[a][b]
+ l.off++
+ if b == eventsBucketSize-1 || l.off == l.n {
+ // We've consumed the last event from the bucket, so drop the bucket and
+ // allow GC to collect it.
+ l.buckets[a] = nil
+ }
+ return ptr, true
+}
+
+func (l *Events) All() func(yield func(ev *Event) bool) {
+ return func(yield func(ev *Event) bool) {
+ for i := 0; i < l.Len(); i++ {
+ a, b := l.index(i + l.off)
+ ptr := &l.buckets[a][b]
+ if !yield(ptr) {
+ return
+ }
+ }
+ }
+}
+
+// parseEventBatches reads per-P event batches and merges them into a single, consistent
+// stream. The high level idea is as follows. Events within an individual batch
+// are in correct order, because they are emitted by a single P. So we need to
+// produce a correct interleaving of the batches. To do this we take first
+// unmerged event from each batch (frontier). Then choose subset that is "ready"
+// to be merged, that is, events for which all dependencies are already merged.
+// Then we choose event with the lowest timestamp from the subset, merge it and
+// repeat. This approach ensures that we form a consistent stream even if
+// timestamps are incorrect (condition observed on some machines).
+func (p *parser) parseEventBatches() (Events, error) {
+ // The ordering of CPU profile sample events in the data stream is based on
+ // when each run of the signal handler was able to acquire the spinlock,
+ // with original timestamps corresponding to when ReadTrace pulled the data
+ // off of the profBuf queue. Re-sort them by the timestamp we captured
+ // inside the signal handler.
+ sort.Sort((*eventList)(&p.cpuSamples))
+
+ allProcs := make([]proc, 0, len(p.batchOffsets))
+ for pid := range p.batchOffsets {
+ allProcs = append(allProcs, proc{pid: pid})
+ }
+ allProcs = append(allProcs, proc{pid: ProfileP, events: p.cpuSamples})
+
+ events := Events{}
+
+ // Merge events as long as at least one P has more events
+ gs := make(map[uint64]gState)
+ // Note: technically we don't need a priority queue here. We're only ever
+ // interested in the earliest elligible event, which means we just have to
+ // track the smallest element. However, in practice, the priority queue
+ // performs better, because for each event we only have to compute its state
+ // transition once, not on each iteration. If it was elligible before, it'll
+ // already be in the queue. Furthermore, on average, we only have one P to
+ // look at in each iteration, because all other Ps are already in the queue.
+ var frontier orderEventList
+
+ availableProcs := make([]*proc, len(allProcs))
+ for i := range allProcs {
+ availableProcs[i] = &allProcs[i]
+ }
+ for {
+ pidLoop:
+ for i := 0; i < len(availableProcs); i++ {
+ proc := availableProcs[i]
+
+ for len(proc.events) == 0 {
+ // Call loadBatch in a loop because sometimes batches are empty
+ evs, err := p.loadBatch(proc.pid, proc.buf[:0])
+ proc.buf = evs[:0]
+ if err == io.EOF {
+ // This P has no more events
+ proc.done = true
+ availableProcs[i], availableProcs[len(availableProcs)-1] = availableProcs[len(availableProcs)-1], availableProcs[i]
+ availableProcs = availableProcs[:len(availableProcs)-1]
+ // We swapped the element at i with another proc, so look at
+ // the index again
+ i--
+ continue pidLoop
+ } else if err != nil {
+ return Events{}, err
+ } else {
+ proc.events = evs
+ }
+ }
+
+ ev := &proc.events[0]
+ g, init, _ := stateTransition(ev)
+
+ // TODO(dh): This implementation matches the behavior of the
+ // upstream 'go tool trace', and works in practice, but has run into
+ // the following inconsistency during fuzzing: what happens if
+ // multiple Ps have events for the same G? While building the
+ // frontier we will check all of the events against the current
+ // state of the G. However, when we process the frontier, the state
+ // of the G changes, and a transition that was valid while building
+ // the frontier may no longer be valid when processing the frontier.
+ // Is this something that can happen for real, valid traces, or is
+ // this only possible with corrupt data?
+ if !transitionReady(g, gs[g], init) {
+ continue
+ }
+ proc.events = proc.events[1:]
+ availableProcs[i], availableProcs[len(availableProcs)-1] = availableProcs[len(availableProcs)-1], availableProcs[i]
+ availableProcs = availableProcs[:len(availableProcs)-1]
+ frontier.Push(orderEvent{*ev, proc})
+
+ // We swapped the element at i with another proc, so look at the
+ // index again
+ i--
+ }
+
+ if len(frontier) == 0 {
+ for i := range allProcs {
+ if !allProcs[i].done {
+ return Events{}, fmt.Errorf("no consistent ordering of events possible")
+ }
+ }
+ break
+ }
+ f := frontier.Pop()
+
+ // We're computing the state transition twice, once when computing the
+ // frontier, and now to apply the transition. This is fine because
+ // stateTransition is a pure function. Computing it again is cheaper
+ // than storing large items in the frontier.
+ g, init, next := stateTransition(&f.ev)
+
+ // Get rid of "Local" events, they are intended merely for ordering.
+ switch f.ev.Type {
+ case EvGoStartLocal:
+ f.ev.Type = EvGoStart
+ case EvGoUnblockLocal:
+ f.ev.Type = EvGoUnblock
+ case EvGoSysExitLocal:
+ f.ev.Type = EvGoSysExit
+ }
+ events.append(f.ev)
+
+ if err := transition(gs, g, init, next); err != nil {
+ return Events{}, err
+ }
+ availableProcs = append(availableProcs, f.proc)
+ }
+
+ // At this point we have a consistent stream of events. Make sure time
+ // stamps respect the ordering. The tests will skip (not fail) the test case
+ // if they see this error.
+ if !sort.IsSorted(&events) {
+ return Events{}, ErrTimeOrder
+ }
+
+ // The last part is giving correct timestamps to EvGoSysExit events. The
+ // problem with EvGoSysExit is that actual syscall exit timestamp
+ // (ev.Args[2]) is potentially acquired long before event emission. So far
+ // we've used timestamp of event emission (ev.Ts). We could not set ev.Ts =
+ // ev.Args[2] earlier, because it would produce seemingly broken timestamps
+ // (misplaced event). We also can't simply update the timestamp and resort
+ // events, because if timestamps are broken we will misplace the event and
+ // later report logically broken trace (instead of reporting broken
+ // timestamps).
+ lastSysBlock := make(map[uint64]Timestamp)
+ for i := 0; i < events.Len(); i++ {
+ ev := events.Ptr(i)
+ switch ev.Type {
+ case EvGoSysBlock, EvGoInSyscall:
+ lastSysBlock[ev.G] = ev.Ts
+ case EvGoSysExit:
+ ts := Timestamp(ev.Args[2])
+ if ts == 0 {
+ continue
+ }
+ block := lastSysBlock[ev.G]
+ if block == 0 {
+ return Events{}, fmt.Errorf("stray syscall exit")
+ }
+ if ts < block {
+ return Events{}, ErrTimeOrder
+ }
+ ev.Ts = ts
+ }
+ }
+ sort.Stable(&events)
+
+ return events, nil
+}
+
+// collectBatchesAndCPUSamples records the offsets of batches and parses CPU samples.
+func (p *parser) collectBatchesAndCPUSamples() error {
+ // Read events.
+ var raw rawEvent
+ var curP int32
+ for n := uint64(0); ; n++ {
+ err := p.readRawEvent(skipArgs|skipStrings, &raw)
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ return err
+ }
+ if raw.typ == EvNone {
+ continue
+ }
+
+ if raw.typ == EvBatch {
+ bo := batchOffset{offset: raw.batchOffset}
+ p.batchOffsets[raw.batchPid] = append(p.batchOffsets[raw.batchPid], bo)
+ curP = raw.batchPid
+ }
+
+ batches := p.batchOffsets[curP]
+ if len(batches) == 0 {
+ return fmt.Errorf("read event %d with current P of %d, but P has no batches yet",
+ raw.typ, curP)
+ }
+ batches[len(batches)-1].numEvents++
+
+ if raw.typ == EvCPUSample {
+ e := Event{Type: raw.typ}
+
+ argOffset := 1
+ narg := raw.argNum()
+ if len(raw.args) != narg {
+ return fmt.Errorf("CPU sample has wrong number of arguments: want %d, got %d", narg, len(raw.args))
+ }
+ for i := argOffset; i < narg; i++ {
+ if i == narg-1 {
+ e.StkID = uint32(raw.args[i])
+ } else {
+ e.Args[i-argOffset] = raw.args[i]
+ }
+ }
+
+ e.Ts = Timestamp(e.Args[0])
+ e.P = int32(e.Args[1])
+ e.G = e.Args[2]
+ e.Args[0] = 0
+
+ // Most events are written out by the active P at the exact moment
+ // they describe. CPU profile samples are different because they're
+ // written to the tracing log after some delay, by a separate worker
+ // goroutine, into a separate buffer.
+ //
+ // We keep these in their own batch until all of the batches are
+ // merged in timestamp order. We also (right before the merge)
+ // re-sort these events by the timestamp captured in the profiling
+ // signal handler.
+ //
+ // Note that we're not concerned about the memory usage of storing
+ // all CPU samples during the indexing phase. There are orders of
+ // magnitude fewer CPU samples than runtime events.
+ p.cpuSamples = append(p.cpuSamples, e)
+ }
+ }
+
+ return nil
+}
+
+const (
+ skipArgs = 1 << iota
+ skipStrings
+)
+
+func (p *parser) readByte() (byte, bool) {
+ if p.off < len(p.data) && p.off >= 0 {
+ b := p.data[p.off]
+ p.off++
+ return b, true
+ } else {
+ return 0, false
+ }
+}
+
+func (p *parser) readFull(n int) ([]byte, error) {
+ if p.off >= len(p.data) || p.off < 0 || p.off+n > len(p.data) {
+ // p.off < 0 is impossible but makes BCE happy.
+ //
+ // We do fail outright if there's not enough data, we don't care about
+ // partial results.
+ return nil, io.ErrUnexpectedEOF
+ }
+ buf := p.data[p.off : p.off+n]
+ p.off += n
+ return buf, nil
+}
+
+// readRawEvent reads a raw event into ev. The slices in ev are only valid until
+// the next call to readRawEvent, even when storing to a different location.
+func (p *parser) readRawEvent(flags uint, ev *rawEvent) error {
+ // The number of arguments is encoded using two bits and can thus only
+ // represent the values 0–3. The value 3 (on the wire) indicates that
+ // arguments are prefixed by their byte length, to encode >=3 arguments.
+ const inlineArgs = 3
+
+ // Read event type and number of arguments (1 byte).
+ b, ok := p.readByte()
+ if !ok {
+ return io.EOF
+ }
+ typ := event.Type(b << 2 >> 2)
+ // Most events have a timestamp before the actual arguments, so we add 1 and
+ // parse it like it's the first argument. EvString has a special format and
+ // the number of arguments doesn't matter. EvBatch writes '1' as the number
+ // of arguments, but actually has two: a pid and a timestamp, but here the
+ // timestamp is the second argument, not the first; adding 1 happens to come
+ // up with the correct number, but it doesn't matter, because EvBatch has
+ // custom logic for parsing.
+ //
+ // Note that because we're adding 1, inlineArgs == 3 describes the largest
+ // number of logical arguments that isn't length-prefixed, even though the
+ // value 3 on the wire indicates length-prefixing. For us, that becomes narg
+ // == 4.
+ narg := b>>6 + 1
+ if typ == EvNone || typ >= EvCount || EventDescriptions[typ].minVersion > p.ver {
+ return fmt.Errorf("unknown event type %d", typ)
+ }
+
+ switch typ {
+ case EvString:
+ if flags&skipStrings != 0 {
+ // String dictionary entry [ID, length, string].
+ if _, err := p.readVal(); err != nil {
+ return errMalformedVarint
+ }
+ ln, err := p.readVal()
+ if err != nil {
+ return err
+ }
+ if !p.discard(ln) {
+ return fmt.Errorf("failed to read trace: %w", io.EOF)
+ }
+ } else {
+ // String dictionary entry [ID, length, string].
+ id, err := p.readVal()
+ if err != nil {
+ return err
+ }
+ if id == 0 {
+ return errors.New("string has invalid id 0")
+ }
+ if p.strings[id] != "" {
+ return fmt.Errorf("string has duplicate id %d", id)
+ }
+ var ln uint64
+ ln, err = p.readVal()
+ if err != nil {
+ return err
+ }
+ if ln == 0 {
+ return errors.New("string has invalid length 0")
+ }
+ if ln > 1e6 {
+ return fmt.Errorf("string has too large length %d", ln)
+ }
+ buf, err := p.readFull(int(ln))
+ if err != nil {
+ return fmt.Errorf("failed to read trace: %w", err)
+ }
+ p.strings[id] = string(buf)
+ }
+
+ ev.typ = EvNone
+ return nil
+ case EvBatch:
+ if want := byte(2); narg != want {
+ return fmt.Errorf("EvBatch has wrong number of arguments: got %d, want %d", narg, want)
+ }
+
+ // -1 because we've already read the first byte of the batch
+ off := p.off - 1
+
+ pid, err := p.readVal()
+ if err != nil {
+ return err
+ }
+ if pid != math.MaxUint64 && pid > math.MaxInt32 {
+ return fmt.Errorf("processor ID %d is larger than maximum of %d", pid, uint64(math.MaxUint))
+ }
+
+ var pid32 int32
+ if pid == math.MaxUint64 {
+ pid32 = -1
+ } else {
+ pid32 = int32(pid)
+ }
+
+ v, err := p.readVal()
+ if err != nil {
+ return err
+ }
+
+ *ev = rawEvent{
+ typ: EvBatch,
+ args: p.args[:0],
+ batchPid: pid32,
+ batchOffset: off,
+ }
+ ev.args = append(ev.args, pid, v)
+ return nil
+ default:
+ *ev = rawEvent{typ: typ, args: p.args[:0]}
+ if narg <= inlineArgs {
+ if flags&skipArgs == 0 {
+ for i := 0; i < int(narg); i++ {
+ v, err := p.readVal()
+ if err != nil {
+ return fmt.Errorf("failed to read event %d argument: %w", typ, err)
+ }
+ ev.args = append(ev.args, v)
+ }
+ } else {
+ for i := 0; i < int(narg); i++ {
+ if _, err := p.readVal(); err != nil {
+ return fmt.Errorf("failed to read event %d argument: %w", typ, errMalformedVarint)
+ }
+ }
+ }
+ } else {
+ // More than inlineArgs args, the first value is length of the event
+ // in bytes.
+ v, err := p.readVal()
+ if err != nil {
+ return fmt.Errorf("failed to read event %d argument: %w", typ, err)
+ }
+
+ if limit := uint64(2048); v > limit {
+ // At the time of Go 1.19, v seems to be at most 128. Set 2048
+ // as a generous upper limit and guard against malformed traces.
+ return fmt.Errorf("failed to read event %d argument: length-prefixed argument too big: %d bytes, limit is %d", typ, v, limit)
+ }
+
+ if flags&skipArgs == 0 || typ == EvCPUSample {
+ buf, err := p.readFull(int(v))
+ if err != nil {
+ return fmt.Errorf("failed to read trace: %w", err)
+ }
+ for len(buf) > 0 {
+ var v uint64
+ v, buf, err = readValFrom(buf)
+ if err != nil {
+ return err
+ }
+ ev.args = append(ev.args, v)
+ }
+ } else {
+ // Skip over arguments
+ if !p.discard(v) {
+ return fmt.Errorf("failed to read trace: %w", io.EOF)
+ }
+ }
+ if typ == EvUserLog {
+ // EvUserLog records are followed by a value string
+ if flags&skipArgs == 0 {
+ // Read string
+ s, err := p.readStr()
+ if err != nil {
+ return err
+ }
+ ev.sargs = append(ev.sargs, s)
+ } else {
+ // Skip string
+ v, err := p.readVal()
+ if err != nil {
+ return err
+ }
+ if !p.discard(v) {
+ return io.EOF
+ }
+ }
+ }
+ }
+
+ p.args = ev.args[:0]
+ return nil
+ }
+}
+
+// loadBatch loads the next batch for pid and appends its contents to to events.
+func (p *parser) loadBatch(pid int32, events []Event) ([]Event, error) {
+ offsets := p.batchOffsets[pid]
+ if len(offsets) == 0 {
+ return nil, io.EOF
+ }
+ n := offsets[0].numEvents
+ offset := offsets[0].offset
+ offsets = offsets[1:]
+ p.batchOffsets[pid] = offsets
+
+ p.off = offset
+
+ if cap(events) < n {
+ events = make([]Event, 0, n)
+ }
+
+ gotHeader := false
+ var raw rawEvent
+ var ev Event
+ for {
+ err := p.readRawEvent(0, &raw)
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ return nil, err
+ }
+ if raw.typ == EvNone || raw.typ == EvCPUSample {
+ continue
+ }
+ if raw.typ == EvBatch {
+ if gotHeader {
+ break
+ } else {
+ gotHeader = true
+ }
+ }
+
+ err = p.parseEvent(&raw, &ev)
+ if err != nil {
+ return nil, err
+ }
+ if ev.Type != EvNone {
+ events = append(events, ev)
+ }
+ }
+
+ return events, nil
+}
+
+func (p *parser) readStr() (s string, err error) {
+ sz, err := p.readVal()
+ if err != nil {
+ return "", err
+ }
+ if sz == 0 {
+ return "", nil
+ }
+ if sz > 1e6 {
+ return "", fmt.Errorf("string is too large (len=%d)", sz)
+ }
+ buf, err := p.readFull(int(sz))
+ if err != nil {
+ return "", fmt.Errorf("failed to read trace: %w", err)
+ }
+ return string(buf), nil
+}
+
+// parseEvent transforms raw events into events.
+// It does analyze and verify per-event-type arguments.
+func (p *parser) parseEvent(raw *rawEvent, ev *Event) error {
+ desc := &EventDescriptions[raw.typ]
+ if desc.Name == "" {
+ return fmt.Errorf("missing description for event type %d", raw.typ)
+ }
+ narg := raw.argNum()
+ if len(raw.args) != narg {
+ return fmt.Errorf("%s has wrong number of arguments: want %d, got %d", desc.Name, narg, len(raw.args))
+ }
+ switch raw.typ {
+ case EvBatch:
+ p.lastGs[p.lastP] = p.lastG
+ if raw.args[0] != math.MaxUint64 && raw.args[0] > math.MaxInt32 {
+ return fmt.Errorf("processor ID %d is larger than maximum of %d", raw.args[0], uint64(math.MaxInt32))
+ }
+ if raw.args[0] == math.MaxUint64 {
+ p.lastP = -1
+ } else {
+ p.lastP = int32(raw.args[0])
+ }
+ p.lastG = p.lastGs[p.lastP]
+ p.lastTs = Timestamp(raw.args[1])
+ case EvFrequency:
+ p.ticksPerSec = int64(raw.args[0])
+ if p.ticksPerSec <= 0 {
+ // The most likely cause for this is tick skew on different CPUs.
+ // For example, solaris/amd64 seems to have wildly different
+ // ticks on different CPUs.
+ return ErrTimeOrder
+ }
+ case EvTimerGoroutine:
+ p.timerGoids[raw.args[0]] = true
+ case EvStack:
+ if len(raw.args) < 2 {
+ return fmt.Errorf("EvStack has wrong number of arguments: want at least 2, got %d", len(raw.args))
+ }
+ size := raw.args[1]
+ if size > 1000 {
+ return fmt.Errorf("EvStack has bad number of frames: %d", size)
+ }
+ want := 2 + 4*size
+ if uint64(len(raw.args)) != want {
+ return fmt.Errorf("EvStack has wrong number of arguments: want %d, got %d", want, len(raw.args))
+ }
+ id := uint32(raw.args[0])
+ if id != 0 && size > 0 {
+ stk := p.allocateStack(size)
+ for i := 0; i < int(size); i++ {
+ pc := raw.args[2+i*4+0]
+ fn := raw.args[2+i*4+1]
+ file := raw.args[2+i*4+2]
+ line := raw.args[2+i*4+3]
+ stk[i] = pc
+
+ if _, ok := p.pcs[pc]; !ok {
+ p.pcs[pc] = Frame{PC: pc, Fn: fn, File: file, Line: int(line)}
+ }
+ }
+ p.stacks[id] = stk
+ }
+ case EvCPUSample:
+ // These events get parsed during the indexing step and don't strictly
+ // belong to the batch.
+ default:
+ *ev = Event{Type: raw.typ, P: p.lastP, G: p.lastG}
+ var argOffset int
+ ev.Ts = p.lastTs + Timestamp(raw.args[0])
+ argOffset = 1
+ p.lastTs = ev.Ts
+ for i := argOffset; i < narg; i++ {
+ if i == narg-1 && desc.Stack {
+ ev.StkID = uint32(raw.args[i])
+ } else {
+ ev.Args[i-argOffset] = raw.args[i]
+ }
+ }
+ switch raw.typ {
+ case EvGoStart, EvGoStartLocal, EvGoStartLabel:
+ p.lastG = ev.Args[0]
+ ev.G = p.lastG
+ case EvGoEnd, EvGoStop, EvGoSched, EvGoPreempt,
+ EvGoSleep, EvGoBlock, EvGoBlockSend, EvGoBlockRecv,
+ EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet,
+ EvGoSysBlock, EvGoBlockGC:
+ p.lastG = 0
+ case EvGoSysExit, EvGoWaiting, EvGoInSyscall:
+ ev.G = ev.Args[0]
+ case EvUserTaskCreate:
+ // e.Args 0: taskID, 1:parentID, 2:nameID
+ case EvUserRegion:
+ // e.Args 0: taskID, 1: mode, 2:nameID
+ case EvUserLog:
+ // e.Args 0: taskID, 1:keyID, 2: stackID, 3: messageID
+ // raw.sargs 0: message
+
+ if id, ok := p.inlineStringsMapping[raw.sargs[0]]; ok {
+ ev.Args[3] = uint64(id)
+ } else {
+ id := len(p.inlineStrings)
+ p.inlineStringsMapping[raw.sargs[0]] = id
+ p.inlineStrings = append(p.inlineStrings, raw.sargs[0])
+ ev.Args[3] = uint64(id)
+ }
+ }
+
+ return nil
+ }
+
+ ev.Type = EvNone
+ return nil
+}
+
+// ErrTimeOrder is returned by Parse when the trace contains
+// time stamps that do not respect actual event ordering.
+var ErrTimeOrder = errors.New("time stamps out of order")
+
+// postProcessTrace does inter-event verification and information restoration.
+// The resulting trace is guaranteed to be consistent
+// (for example, a P does not run two Gs at the same time, or a G is indeed
+// blocked before an unblock event).
+func (p *parser) postProcessTrace(events Events) error {
+ const (
+ gDead = iota
+ gRunnable
+ gRunning
+ gWaiting
+ )
+ type gdesc struct {
+ state int
+ ev *Event
+ evStart *Event
+ evCreate *Event
+ evMarkAssist *Event
+ }
+ type pdesc struct {
+ running bool
+ g uint64
+ evSweep *Event
+ }
+
+ gs := make(map[uint64]gdesc)
+ ps := make(map[int32]pdesc)
+ tasks := make(map[uint64]*Event) // task id to task creation events
+ activeRegions := make(map[uint64][]*Event) // goroutine id to stack of regions
+ gs[0] = gdesc{state: gRunning}
+ var evGC, evSTW *Event
+
+ checkRunning := func(p pdesc, g gdesc, ev *Event, allowG0 bool) error {
+ name := EventDescriptions[ev.Type].Name
+ if g.state != gRunning {
+ return fmt.Errorf("g %d is not running while %s (time %d)", ev.G, name, ev.Ts)
+ }
+ if p.g != ev.G {
+ return fmt.Errorf("p %d is not running g %d while %s (time %d)", ev.P, ev.G, name, ev.Ts)
+ }
+ if !allowG0 && ev.G == 0 {
+ return fmt.Errorf("g 0 did %s (time %d)", name, ev.Ts)
+ }
+ return nil
+ }
+
+ for evIdx := 0; evIdx < events.Len(); evIdx++ {
+ ev := events.Ptr(evIdx)
+
+ switch ev.Type {
+ case EvProcStart:
+ p := ps[ev.P]
+ if p.running {
+ return fmt.Errorf("p %d is running before start (time %d)", ev.P, ev.Ts)
+ }
+ p.running = true
+
+ ps[ev.P] = p
+ case EvProcStop:
+ p := ps[ev.P]
+ if !p.running {
+ return fmt.Errorf("p %d is not running before stop (time %d)", ev.P, ev.Ts)
+ }
+ if p.g != 0 {
+ return fmt.Errorf("p %d is running a goroutine %d during stop (time %d)", ev.P, p.g, ev.Ts)
+ }
+ p.running = false
+
+ ps[ev.P] = p
+ case EvGCStart:
+ if evGC != nil {
+ return fmt.Errorf("previous GC is not ended before a new one (time %d)", ev.Ts)
+ }
+ evGC = ev
+ // Attribute this to the global GC state.
+ ev.P = GCP
+ case EvGCDone:
+ if evGC == nil {
+ return fmt.Errorf("bogus GC end (time %d)", ev.Ts)
+ }
+ evGC = nil
+ case EvSTWStart:
+ evp := &evSTW
+ if *evp != nil {
+ return fmt.Errorf("previous STW is not ended before a new one (time %d)", ev.Ts)
+ }
+ *evp = ev
+ case EvSTWDone:
+ evp := &evSTW
+ if *evp == nil {
+ return fmt.Errorf("bogus STW end (time %d)", ev.Ts)
+ }
+ *evp = nil
+ case EvGCSweepStart:
+ p := ps[ev.P]
+ if p.evSweep != nil {
+ return fmt.Errorf("previous sweeping is not ended before a new one (time %d)", ev.Ts)
+ }
+ p.evSweep = ev
+
+ ps[ev.P] = p
+ case EvGCMarkAssistStart:
+ g := gs[ev.G]
+ if g.evMarkAssist != nil {
+ return fmt.Errorf("previous mark assist is not ended before a new one (time %d)", ev.Ts)
+ }
+ g.evMarkAssist = ev
+
+ gs[ev.G] = g
+ case EvGCMarkAssistDone:
+ // Unlike most events, mark assists can be in progress when a
+ // goroutine starts tracing, so we can't report an error here.
+ g := gs[ev.G]
+ if g.evMarkAssist != nil {
+ g.evMarkAssist = nil
+ }
+
+ gs[ev.G] = g
+ case EvGCSweepDone:
+ p := ps[ev.P]
+ if p.evSweep == nil {
+ return fmt.Errorf("bogus sweeping end (time %d)", ev.Ts)
+ }
+ p.evSweep = nil
+
+ ps[ev.P] = p
+ case EvGoWaiting:
+ g := gs[ev.G]
+ if g.state != gRunnable {
+ return fmt.Errorf("g %d is not runnable before EvGoWaiting (time %d)", ev.G, ev.Ts)
+ }
+ g.state = gWaiting
+ g.ev = ev
+
+ gs[ev.G] = g
+ case EvGoInSyscall:
+ g := gs[ev.G]
+ if g.state != gRunnable {
+ return fmt.Errorf("g %d is not runnable before EvGoInSyscall (time %d)", ev.G, ev.Ts)
+ }
+ g.state = gWaiting
+ g.ev = ev
+
+ gs[ev.G] = g
+ case EvGoCreate:
+ g := gs[ev.G]
+ p := ps[ev.P]
+ if err := checkRunning(p, g, ev, true); err != nil {
+ return err
+ }
+ if _, ok := gs[ev.Args[0]]; ok {
+ return fmt.Errorf("g %d already exists (time %d)", ev.Args[0], ev.Ts)
+ }
+ gs[ev.Args[0]] = gdesc{state: gRunnable, ev: ev, evCreate: ev}
+
+ case EvGoStart, EvGoStartLabel:
+ g := gs[ev.G]
+ p := ps[ev.P]
+ if g.state != gRunnable {
+ return fmt.Errorf("g %d is not runnable before start (time %d)", ev.G, ev.Ts)
+ }
+ if p.g != 0 {
+ return fmt.Errorf("p %d is already running g %d while start g %d (time %d)", ev.P, p.g, ev.G, ev.Ts)
+ }
+ g.state = gRunning
+ g.evStart = ev
+ p.g = ev.G
+ if g.evCreate != nil {
+ ev.StkID = uint32(g.evCreate.Args[1])
+ g.evCreate = nil
+ }
+
+ if g.ev != nil {
+ g.ev = nil
+ }
+
+ gs[ev.G] = g
+ ps[ev.P] = p
+ case EvGoEnd, EvGoStop:
+ g := gs[ev.G]
+ p := ps[ev.P]
+ if err := checkRunning(p, g, ev, false); err != nil {
+ return err
+ }
+ g.evStart = nil
+ g.state = gDead
+ p.g = 0
+
+ if ev.Type == EvGoEnd { // flush all active regions
+ delete(activeRegions, ev.G)
+ }
+
+ gs[ev.G] = g
+ ps[ev.P] = p
+ case EvGoSched, EvGoPreempt:
+ g := gs[ev.G]
+ p := ps[ev.P]
+ if err := checkRunning(p, g, ev, false); err != nil {
+ return err
+ }
+ g.state = gRunnable
+ g.evStart = nil
+ p.g = 0
+ g.ev = ev
+
+ gs[ev.G] = g
+ ps[ev.P] = p
+ case EvGoUnblock:
+ g := gs[ev.G]
+ p := ps[ev.P]
+ if g.state != gRunning {
+ return fmt.Errorf("g %d is not running while unpark (time %d)", ev.G, ev.Ts)
+ }
+ if ev.P != TimerP && p.g != ev.G {
+ return fmt.Errorf("p %d is not running g %d while unpark (time %d)", ev.P, ev.G, ev.Ts)
+ }
+ g1 := gs[ev.Args[0]]
+ if g1.state != gWaiting {
+ return fmt.Errorf("g %d is not waiting before unpark (time %d)", ev.Args[0], ev.Ts)
+ }
+ if g1.ev != nil && g1.ev.Type == EvGoBlockNet {
+ ev.P = NetpollP
+ }
+ g1.state = gRunnable
+ g1.ev = ev
+ gs[ev.Args[0]] = g1
+
+ case EvGoSysCall:
+ g := gs[ev.G]
+ p := ps[ev.P]
+ if err := checkRunning(p, g, ev, false); err != nil {
+ return err
+ }
+ g.ev = ev
+
+ gs[ev.G] = g
+ case EvGoSysBlock:
+ g := gs[ev.G]
+ p := ps[ev.P]
+ if err := checkRunning(p, g, ev, false); err != nil {
+ return err
+ }
+ g.state = gWaiting
+ g.evStart = nil
+ p.g = 0
+
+ gs[ev.G] = g
+ ps[ev.P] = p
+ case EvGoSysExit:
+ g := gs[ev.G]
+ if g.state != gWaiting {
+ return fmt.Errorf("g %d is not waiting during syscall exit (time %d)", ev.G, ev.Ts)
+ }
+ g.state = gRunnable
+ g.ev = ev
+
+ gs[ev.G] = g
+ case EvGoSleep, EvGoBlock, EvGoBlockSend, EvGoBlockRecv,
+ EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet, EvGoBlockGC:
+ g := gs[ev.G]
+ p := ps[ev.P]
+ if err := checkRunning(p, g, ev, false); err != nil {
+ return err
+ }
+ g.state = gWaiting
+ g.ev = ev
+ g.evStart = nil
+ p.g = 0
+
+ gs[ev.G] = g
+ ps[ev.P] = p
+ case EvUserTaskCreate:
+ taskid := ev.Args[0]
+ if prevEv, ok := tasks[taskid]; ok {
+ return fmt.Errorf("task id conflicts (id:%d), %q vs %q", taskid, ev, prevEv)
+ }
+ tasks[ev.Args[0]] = ev
+
+ case EvUserTaskEnd:
+ taskid := ev.Args[0]
+ delete(tasks, taskid)
+
+ case EvUserRegion:
+ mode := ev.Args[1]
+ regions := activeRegions[ev.G]
+ if mode == 0 { // region start
+ activeRegions[ev.G] = append(regions, ev) // push
+ } else if mode == 1 { // region end
+ n := len(regions)
+ if n > 0 { // matching region start event is in the trace.
+ s := regions[n-1]
+ if s.Args[0] != ev.Args[0] || s.Args[2] != ev.Args[2] { // task id, region name mismatch
+ return fmt.Errorf("misuse of region in goroutine %d: span end %q when the inner-most active span start event is %q", ev.G, ev, s)
+ }
+
+ if n > 1 {
+ activeRegions[ev.G] = regions[:n-1]
+ } else {
+ delete(activeRegions, ev.G)
+ }
+ }
+ } else {
+ return fmt.Errorf("invalid user region mode: %q", ev)
+ }
+ }
+
+ if ev.StkID != 0 && len(p.stacks[ev.StkID]) == 0 {
+ // Make sure events don't refer to stacks that don't exist or to
+ // stacks with zero frames. Neither of these should be possible, but
+ // better be safe than sorry.
+
+ ev.StkID = 0
+ }
+
+ }
+
+ // TODO(mknyszek): restore stacks for EvGoStart events.
+ return nil
+}
+
+var errMalformedVarint = errors.New("malformatted base-128 varint")
+
+// readVal reads unsigned base-128 value from r.
+func (p *parser) readVal() (uint64, error) {
+ v, n := binary.Uvarint(p.data[p.off:])
+ if n <= 0 {
+ return 0, errMalformedVarint
+ }
+ p.off += n
+ return v, nil
+}
+
+func readValFrom(buf []byte) (v uint64, rem []byte, err error) {
+ v, n := binary.Uvarint(buf)
+ if n <= 0 {
+ return 0, nil, errMalformedVarint
+ }
+ return v, buf[n:], nil
+}
+
+func (ev *Event) String() string {
+ desc := &EventDescriptions[ev.Type]
+ w := new(bytes.Buffer)
+ fmt.Fprintf(w, "%d %s p=%d g=%d stk=%d", ev.Ts, desc.Name, ev.P, ev.G, ev.StkID)
+ for i, a := range desc.Args {
+ fmt.Fprintf(w, " %s=%d", a, ev.Args[i])
+ }
+ return w.String()
+}
+
+// argNum returns total number of args for the event accounting for timestamps,
+// sequence numbers and differences between trace format versions.
+func (raw *rawEvent) argNum() int {
+ desc := &EventDescriptions[raw.typ]
+ if raw.typ == EvStack {
+ return len(raw.args)
+ }
+ narg := len(desc.Args)
+ if desc.Stack {
+ narg++
+ }
+ switch raw.typ {
+ case EvBatch, EvFrequency, EvTimerGoroutine:
+ return narg
+ }
+ narg++ // timestamp
+ return narg
+}
+
+// Event types in the trace.
+// Verbatim copy from src/runtime/trace.go with the "trace" prefix removed.
+const (
+ EvNone event.Type = 0 // unused
+ EvBatch event.Type = 1 // start of per-P batch of events [pid, timestamp]
+ EvFrequency event.Type = 2 // contains tracer timer frequency [frequency (ticks per second)]
+ EvStack event.Type = 3 // stack [stack id, number of PCs, array of {PC, func string ID, file string ID, line}]
+ EvGomaxprocs event.Type = 4 // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id]
+ EvProcStart event.Type = 5 // start of P [timestamp, thread id]
+ EvProcStop event.Type = 6 // stop of P [timestamp]
+ EvGCStart event.Type = 7 // GC start [timestamp, seq, stack id]
+ EvGCDone event.Type = 8 // GC done [timestamp]
+ EvSTWStart event.Type = 9 // GC mark termination start [timestamp, kind]
+ EvSTWDone event.Type = 10 // GC mark termination done [timestamp]
+ EvGCSweepStart event.Type = 11 // GC sweep start [timestamp, stack id]
+ EvGCSweepDone event.Type = 12 // GC sweep done [timestamp, swept, reclaimed]
+ EvGoCreate event.Type = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
+ EvGoStart event.Type = 14 // goroutine starts running [timestamp, goroutine id, seq]
+ EvGoEnd event.Type = 15 // goroutine ends [timestamp]
+ EvGoStop event.Type = 16 // goroutine stops (like in select{}) [timestamp, stack]
+ EvGoSched event.Type = 17 // goroutine calls Gosched [timestamp, stack]
+ EvGoPreempt event.Type = 18 // goroutine is preempted [timestamp, stack]
+ EvGoSleep event.Type = 19 // goroutine calls Sleep [timestamp, stack]
+ EvGoBlock event.Type = 20 // goroutine blocks [timestamp, stack]
+ EvGoUnblock event.Type = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack]
+ EvGoBlockSend event.Type = 22 // goroutine blocks on chan send [timestamp, stack]
+ EvGoBlockRecv event.Type = 23 // goroutine blocks on chan recv [timestamp, stack]
+ EvGoBlockSelect event.Type = 24 // goroutine blocks on select [timestamp, stack]
+ EvGoBlockSync event.Type = 25 // goroutine blocks on Mutex/RWMutex [timestamp, stack]
+ EvGoBlockCond event.Type = 26 // goroutine blocks on Cond [timestamp, stack]
+ EvGoBlockNet event.Type = 27 // goroutine blocks on network [timestamp, stack]
+ EvGoSysCall event.Type = 28 // syscall enter [timestamp, stack]
+ EvGoSysExit event.Type = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp]
+ EvGoSysBlock event.Type = 30 // syscall blocks [timestamp]
+ EvGoWaiting event.Type = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id]
+ EvGoInSyscall event.Type = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id]
+ EvHeapAlloc event.Type = 33 // gcController.heapLive change [timestamp, heap live bytes]
+ EvHeapGoal event.Type = 34 // gcController.heapGoal change [timestamp, heap goal bytes]
+ EvTimerGoroutine event.Type = 35 // denotes timer goroutine [timer goroutine id]
+ EvFutileWakeup event.Type = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp]
+ EvString event.Type = 37 // string dictionary entry [ID, length, string]
+ EvGoStartLocal event.Type = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id]
+ EvGoUnblockLocal event.Type = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack]
+ EvGoSysExitLocal event.Type = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp]
+ EvGoStartLabel event.Type = 41 // goroutine starts running with label [timestamp, goroutine id, seq, label string id]
+ EvGoBlockGC event.Type = 42 // goroutine blocks on GC assist [timestamp, stack]
+ EvGCMarkAssistStart event.Type = 43 // GC mark assist start [timestamp, stack]
+ EvGCMarkAssistDone event.Type = 44 // GC mark assist done [timestamp]
+ EvUserTaskCreate event.Type = 45 // trace.NewTask [timestamp, internal task id, internal parent id, stack, name string]
+ EvUserTaskEnd event.Type = 46 // end of task [timestamp, internal task id, stack]
+ EvUserRegion event.Type = 47 // trace.WithRegion [timestamp, internal task id, mode(0:start, 1:end), name string]
+ EvUserLog event.Type = 48 // trace.Log [timestamp, internal id, key string id, stack, value string]
+ EvCPUSample event.Type = 49 // CPU profiling sample [timestamp, stack, real timestamp, real P id (-1 when absent), goroutine id]
+ EvCount event.Type = 50
+)
+
+var EventDescriptions = [256]struct {
+ Name string
+ minVersion version.Version
+ Stack bool
+ Args []string
+ SArgs []string // string arguments
+}{
+ EvNone: {"None", 5, false, []string{}, nil},
+ EvBatch: {"Batch", 5, false, []string{"p", "ticks"}, nil}, // in 1.5 format it was {"p", "seq", "ticks"}
+ EvFrequency: {"Frequency", 5, false, []string{"freq"}, nil}, // in 1.5 format it was {"freq", "unused"}
+ EvStack: {"Stack", 5, false, []string{"id", "siz"}, nil},
+ EvGomaxprocs: {"Gomaxprocs", 5, true, []string{"procs"}, nil},
+ EvProcStart: {"ProcStart", 5, false, []string{"thread"}, nil},
+ EvProcStop: {"ProcStop", 5, false, []string{}, nil},
+ EvGCStart: {"GCStart", 5, true, []string{"seq"}, nil}, // in 1.5 format it was {}
+ EvGCDone: {"GCDone", 5, false, []string{}, nil},
+ EvSTWStart: {"GCSTWStart", 5, false, []string{"kindid"}, []string{"kind"}}, // <= 1.9, args was {} (implicitly {0})
+ EvSTWDone: {"GCSTWDone", 5, false, []string{}, nil},
+ EvGCSweepStart: {"GCSweepStart", 5, true, []string{}, nil},
+ EvGCSweepDone: {"GCSweepDone", 5, false, []string{"swept", "reclaimed"}, nil}, // before 1.9, format was {}
+ EvGoCreate: {"GoCreate", 5, true, []string{"g", "stack"}, nil},
+ EvGoStart: {"GoStart", 5, false, []string{"g", "seq"}, nil}, // in 1.5 format it was {"g"}
+ EvGoEnd: {"GoEnd", 5, false, []string{}, nil},
+ EvGoStop: {"GoStop", 5, true, []string{}, nil},
+ EvGoSched: {"GoSched", 5, true, []string{}, nil},
+ EvGoPreempt: {"GoPreempt", 5, true, []string{}, nil},
+ EvGoSleep: {"GoSleep", 5, true, []string{}, nil},
+ EvGoBlock: {"GoBlock", 5, true, []string{}, nil},
+ EvGoUnblock: {"GoUnblock", 5, true, []string{"g", "seq"}, nil}, // in 1.5 format it was {"g"}
+ EvGoBlockSend: {"GoBlockSend", 5, true, []string{}, nil},
+ EvGoBlockRecv: {"GoBlockRecv", 5, true, []string{}, nil},
+ EvGoBlockSelect: {"GoBlockSelect", 5, true, []string{}, nil},
+ EvGoBlockSync: {"GoBlockSync", 5, true, []string{}, nil},
+ EvGoBlockCond: {"GoBlockCond", 5, true, []string{}, nil},
+ EvGoBlockNet: {"GoBlockNet", 5, true, []string{}, nil},
+ EvGoSysCall: {"GoSysCall", 5, true, []string{}, nil},
+ EvGoSysExit: {"GoSysExit", 5, false, []string{"g", "seq", "ts"}, nil},
+ EvGoSysBlock: {"GoSysBlock", 5, false, []string{}, nil},
+ EvGoWaiting: {"GoWaiting", 5, false, []string{"g"}, nil},
+ EvGoInSyscall: {"GoInSyscall", 5, false, []string{"g"}, nil},
+ EvHeapAlloc: {"HeapAlloc", 5, false, []string{"mem"}, nil},
+ EvHeapGoal: {"HeapGoal", 5, false, []string{"mem"}, nil},
+ EvTimerGoroutine: {"TimerGoroutine", 5, false, []string{"g"}, nil}, // in 1.5 format it was {"g", "unused"}
+ EvFutileWakeup: {"FutileWakeup", 5, false, []string{}, nil},
+ EvString: {"String", 7, false, []string{}, nil},
+ EvGoStartLocal: {"GoStartLocal", 7, false, []string{"g"}, nil},
+ EvGoUnblockLocal: {"GoUnblockLocal", 7, true, []string{"g"}, nil},
+ EvGoSysExitLocal: {"GoSysExitLocal", 7, false, []string{"g", "ts"}, nil},
+ EvGoStartLabel: {"GoStartLabel", 8, false, []string{"g", "seq", "labelid"}, []string{"label"}},
+ EvGoBlockGC: {"GoBlockGC", 8, true, []string{}, nil},
+ EvGCMarkAssistStart: {"GCMarkAssistStart", 9, true, []string{}, nil},
+ EvGCMarkAssistDone: {"GCMarkAssistDone", 9, false, []string{}, nil},
+ EvUserTaskCreate: {"UserTaskCreate", 11, true, []string{"taskid", "pid", "typeid"}, []string{"name"}},
+ EvUserTaskEnd: {"UserTaskEnd", 11, true, []string{"taskid"}, nil},
+ EvUserRegion: {"UserRegion", 11, true, []string{"taskid", "mode", "typeid"}, []string{"name"}},
+ EvUserLog: {"UserLog", 11, true, []string{"id", "keyid"}, []string{"category", "message"}},
+ EvCPUSample: {"CPUSample", 19, true, []string{"ts", "p", "g"}, nil},
+}
+
+//gcassert:inline
+func (p *parser) allocateStack(size uint64) []uint64 {
+ if size == 0 {
+ return nil
+ }
+
+ // Stacks are plentiful but small. For our "Staticcheck on std" trace with
+ // 11e6 events, we have roughly 500,000 stacks, using 200 MiB of memory. To
+ // avoid making 500,000 small allocations we allocate backing arrays 1 MiB
+ // at a time.
+ out := p.stacksData
+ if uint64(len(out)) < size {
+ out = make([]uint64, 1024*128)
+ }
+ p.stacksData = out[size:]
+ return out[:size:size]
+}
+
+func (tr *Trace) STWReason(kindID uint64) STWReason {
+ if tr.Version < 21 {
+ if kindID == 0 || kindID == 1 {
+ return STWReason(kindID + 1)
+ } else {
+ return STWUnknown
+ }
+ } else if tr.Version == 21 {
+ if kindID < NumSTWReasons {
+ return STWReason(kindID)
+ } else {
+ return STWUnknown
+ }
+ } else {
+ return STWUnknown
+ }
+}
+
+type STWReason int
+
+const (
+ STWUnknown STWReason = 0
+ STWGCMarkTermination STWReason = 1
+ STWGCSweepTermination STWReason = 2
+ STWWriteHeapDump STWReason = 3
+ STWGoroutineProfile STWReason = 4
+ STWGoroutineProfileCleanup STWReason = 5
+ STWAllGoroutinesStackTrace STWReason = 6
+ STWReadMemStats STWReason = 7
+ STWAllThreadsSyscall STWReason = 8
+ STWGOMAXPROCS STWReason = 9
+ STWStartTrace STWReason = 10
+ STWStopTrace STWReason = 11
+ STWCountPagesInUse STWReason = 12
+ STWReadMetricsSlow STWReason = 13
+ STWReadMemStatsSlow STWReason = 14
+ STWPageCachePagesLeaked STWReason = 15
+ STWResetDebugLog STWReason = 16
+
+ NumSTWReasons = 17
+)
--- /dev/null
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements conversion from old (Go 1.11–Go 1.21) traces to the Go
+// 1.22 format.
+//
+// Most events have direct equivalents in 1.22, at worst requiring arguments to
+// be reordered. Some events, such as GoWaiting need to look ahead for follow-up
+// events to determine the correct translation. GoSyscall, which is an
+// instantaneous event, gets turned into a 1 ns long pair of
+// GoSyscallStart+GoSyscallEnd, unless we observe a GoSysBlock, in which case we
+// emit a GoSyscallStart+GoSyscallEndBlocked pair with the correct duration
+// (i.e. starting at the original GoSyscall).
+//
+// The resulting trace treats the old trace as a single, large generation,
+// sharing a single evTable for all events.
+//
+// We use a new (compared to what was used for 'go tool trace' in earlier
+// versions of Go) parser for old traces that is optimized for speed, low memory
+// usage, and minimal GC pressure. It allocates events in batches so that even
+// though we have to load the entire trace into memory, the conversion process
+// shouldn't result in a doubling of memory usage, even if all converted events
+// are kept alive, as we free batches once we're done with them.
+//
+// The conversion process is lossless.
+
+package trace
+
+import (
+ "errors"
+ "fmt"
+ "internal/trace/v2/event"
+ "internal/trace/v2/event/go122"
+ "internal/trace/v2/internal/oldtrace"
+ "io"
+)
+
+type oldTraceConverter struct {
+ trace oldtrace.Trace
+ evt *evTable
+ preInit bool
+ createdPreInit map[GoID]struct{}
+ events oldtrace.Events
+ extra []Event
+ extraArr [3]Event
+ tasks map[TaskID]taskState
+ seenProcs map[ProcID]struct{}
+ lastTs Time
+ procMs map[ProcID]ThreadID
+ lastStwReason uint64
+
+ inlineToStringID []uint64
+ builtinToStringID []uint64
+}
+
+const (
+ // Block reasons
+ sForever = iota
+ sPreempted
+ sGosched
+ sSleep
+ sChanSend
+ sChanRecv
+ sNetwork
+ sSync
+ sSyncCond
+ sSelect
+ sEmpty
+ sMarkAssistWait
+
+ // STW kinds
+ sSTWUnknown
+ sSTWGCMarkTermination
+ sSTWGCSweepTermination
+ sSTWWriteHeapDump
+ sSTWGoroutineProfile
+ sSTWGoroutineProfileCleanup
+ sSTWAllGoroutinesStackTrace
+ sSTWReadMemStats
+ sSTWAllThreadsSyscall
+ sSTWGOMAXPROCS
+ sSTWStartTrace
+ sSTWStopTrace
+ sSTWCountPagesInUse
+ sSTWReadMetricsSlow
+ sSTWReadMemStatsSlow
+ sSTWPageCachePagesLeaked
+ sSTWResetDebugLog
+
+ sLast
+)
+
+func (it *oldTraceConverter) init(pr oldtrace.Trace) error {
+ it.trace = pr
+ it.preInit = true
+ it.createdPreInit = make(map[GoID]struct{})
+ it.evt = &evTable{pcs: make(map[uint64]frame)}
+ it.events = pr.Events
+ it.extra = it.extraArr[:0]
+ it.tasks = make(map[TaskID]taskState)
+ it.seenProcs = make(map[ProcID]struct{})
+ it.procMs = make(map[ProcID]ThreadID)
+ it.lastTs = -1
+
+ evt := it.evt
+
+ // Convert from oldtracer's Strings map to our dataTable.
+ var max uint64
+ for id, s := range pr.Strings {
+ evt.strings.insert(stringID(id), s)
+ if id > max {
+ max = id
+ }
+ }
+ pr.Strings = nil
+
+ // Add all strings used for UserLog. In the old trace format, these were
+ // stored inline and didn't have IDs. We generate IDs for them.
+ if max+uint64(len(pr.InlineStrings)) < max {
+ return errors.New("trace contains too many strings")
+ }
+ var addErr error
+ add := func(id stringID, s string) {
+ if err := evt.strings.insert(id, s); err != nil && addErr == nil {
+ addErr = err
+ }
+ }
+ for id, s := range pr.InlineStrings {
+ nid := max + 1 + uint64(id)
+ it.inlineToStringID = append(it.inlineToStringID, nid)
+ add(stringID(nid), s)
+ }
+ max += uint64(len(pr.InlineStrings))
+ pr.InlineStrings = nil
+
+ // Add strings that the converter emits explicitly.
+ if max+uint64(sLast) < max {
+ return errors.New("trace contains too many strings")
+ }
+ it.builtinToStringID = make([]uint64, sLast)
+ addBuiltin := func(c int, s string) {
+ nid := max + 1 + uint64(c)
+ it.builtinToStringID[c] = nid
+ add(stringID(nid), s)
+ }
+ addBuiltin(sForever, "forever")
+ addBuiltin(sPreempted, "preempted")
+ addBuiltin(sGosched, "runtime.Gosched")
+ addBuiltin(sSleep, "sleep")
+ addBuiltin(sChanSend, "chan send")
+ addBuiltin(sChanRecv, "chan receive")
+ addBuiltin(sNetwork, "network")
+ addBuiltin(sSync, "sync")
+ addBuiltin(sSyncCond, "sync.(*Cond).Wait")
+ addBuiltin(sSelect, "select")
+ addBuiltin(sEmpty, "")
+ addBuiltin(sMarkAssistWait, "GC mark assist wait for work")
+ addBuiltin(sSTWUnknown, "")
+ addBuiltin(sSTWGCMarkTermination, "GC mark termination")
+ addBuiltin(sSTWGCSweepTermination, "GC sweep termination")
+ addBuiltin(sSTWWriteHeapDump, "write heap dump")
+ addBuiltin(sSTWGoroutineProfile, "goroutine profile")
+ addBuiltin(sSTWGoroutineProfileCleanup, "goroutine profile cleanup")
+ addBuiltin(sSTWAllGoroutinesStackTrace, "all goroutine stack trace")
+ addBuiltin(sSTWReadMemStats, "read mem stats")
+ addBuiltin(sSTWAllThreadsSyscall, "AllThreadsSyscall")
+ addBuiltin(sSTWGOMAXPROCS, "GOMAXPROCS")
+ addBuiltin(sSTWStartTrace, "start trace")
+ addBuiltin(sSTWStopTrace, "stop trace")
+ addBuiltin(sSTWCountPagesInUse, "CountPagesInUse (test)")
+ addBuiltin(sSTWReadMetricsSlow, "ReadMetricsSlow (test)")
+ addBuiltin(sSTWReadMemStatsSlow, "ReadMemStatsSlow (test)")
+ addBuiltin(sSTWPageCachePagesLeaked, "PageCachePagesLeaked (test)")
+ addBuiltin(sSTWResetDebugLog, "ResetDebugLog (test)")
+
+ if addErr != nil {
+ // This should be impossible but let's be safe.
+ return fmt.Errorf("couldn't add strings: %w", addErr)
+ }
+
+ it.evt.strings.compactify()
+
+ // Convert stacks.
+ for id, stk := range pr.Stacks {
+ evt.stacks.insert(stackID(id), stack{pcs: stk})
+ }
+
+ // OPT(dh): if we could share the frame type between this package and
+ // oldtrace we wouldn't have to copy the map.
+ for pc, f := range pr.PCs {
+ evt.pcs[pc] = frame{
+ pc: pc,
+ funcID: stringID(f.Fn),
+ fileID: stringID(f.File),
+ line: uint64(f.Line),
+ }
+ }
+ pr.Stacks = nil
+ pr.PCs = nil
+ evt.stacks.compactify()
+ return nil
+}
+
+// next returns the next event, io.EOF if there are no more events, or a
+// descriptive error for invalid events.
+func (it *oldTraceConverter) next() (Event, error) {
+ if len(it.extra) > 0 {
+ ev := it.extra[0]
+ it.extra = it.extra[1:]
+
+ if len(it.extra) == 0 {
+ it.extra = it.extraArr[:0]
+ }
+ // Two events aren't allowed to fall on the same timestamp in the new API,
+ // but this may happen when we produce EvGoStatus events
+ if ev.base.time <= it.lastTs {
+ ev.base.time = it.lastTs + 1
+ }
+ it.lastTs = ev.base.time
+ return ev, nil
+ }
+
+ oev, ok := it.events.Pop()
+ if !ok {
+ return Event{}, io.EOF
+ }
+
+ ev, err := it.convertEvent(oev)
+
+ if err == errSkip {
+ return it.next()
+ } else if err != nil {
+ return Event{}, err
+ }
+
+ // Two events aren't allowed to fall on the same timestamp in the new API,
+ // but this may happen when we produce EvGoStatus events
+ if ev.base.time <= it.lastTs {
+ ev.base.time = it.lastTs + 1
+ }
+ it.lastTs = ev.base.time
+ return ev, nil
+}
+
+var errSkip = errors.New("skip event")
+
+// convertEvent converts an event from the old trace format to zero or more
+// events in the new format. Most events translate 1 to 1. Some events don't
+// result in an event right away, in which case convertEvent returns errSkip.
+// Some events result in more than one new event; in this case, convertEvent
+// returns the first event and stores additional events in it.extra. When
+// encountering events that oldtrace shouldn't be able to emit, ocnvertEvent
+// returns a descriptive error.
+func (it *oldTraceConverter) convertEvent(ev *oldtrace.Event) (OUT Event, ERR error) {
+ var mappedType event.Type
+ mappedArgs := ev.Args
+
+ switch ev.Type {
+ case oldtrace.EvGomaxprocs:
+ mappedType = go122.EvProcsChange
+ if it.preInit {
+ // The first EvGomaxprocs signals the end of trace initialization. At this point we've seen
+ // all goroutines that already existed at trace begin.
+ it.preInit = false
+ for gid := range it.createdPreInit {
+ // These are goroutines that already existed when tracing started but for which we
+ // received neither GoWaiting, GoInSyscall, or GoStart. These are goroutines that are in
+ // the states _Gidle or _Grunnable.
+ it.extra = append(it.extra, Event{
+ ctx: schedCtx{
+ // G: GoID(gid),
+ G: NoGoroutine,
+ P: NoProc,
+ M: NoThread,
+ },
+ table: it.evt,
+ base: baseEvent{
+ typ: go122.EvGoStatus,
+ time: Time(ev.Ts),
+ args: [4]uint64{uint64(gid), ^uint64(0), uint64(go122.GoRunnable)},
+ },
+ })
+ }
+ it.createdPreInit = nil
+ return Event{}, errSkip
+ }
+ case oldtrace.EvProcStart:
+ it.procMs[ProcID(ev.P)] = ThreadID(ev.Args[0])
+ if _, ok := it.seenProcs[ProcID(ev.P)]; ok {
+ mappedType = go122.EvProcStart
+ mappedArgs = [4]uint64{uint64(ev.P)}
+ } else {
+ it.seenProcs[ProcID(ev.P)] = struct{}{}
+ mappedType = go122.EvProcStatus
+ mappedArgs = [4]uint64{uint64(ev.P), uint64(go122.ProcRunning)}
+ }
+ case oldtrace.EvProcStop:
+ if _, ok := it.seenProcs[ProcID(ev.P)]; ok {
+ mappedType = go122.EvProcStop
+ mappedArgs = [4]uint64{uint64(ev.P)}
+ } else {
+ it.seenProcs[ProcID(ev.P)] = struct{}{}
+ mappedType = go122.EvProcStatus
+ mappedArgs = [4]uint64{uint64(ev.P), uint64(go122.ProcIdle)}
+ }
+ case oldtrace.EvGCStart:
+ mappedType = go122.EvGCBegin
+ case oldtrace.EvGCDone:
+ mappedType = go122.EvGCEnd
+ case oldtrace.EvSTWStart:
+ sid := it.builtinToStringID[sSTWUnknown+it.trace.STWReason(ev.Args[0])]
+ it.lastStwReason = sid
+ mappedType = go122.EvSTWBegin
+ mappedArgs = [4]uint64{uint64(sid)}
+ case oldtrace.EvSTWDone:
+ mappedType = go122.EvSTWEnd
+ mappedArgs = [4]uint64{it.lastStwReason}
+ case oldtrace.EvGCSweepStart:
+ mappedType = go122.EvGCSweepBegin
+ case oldtrace.EvGCSweepDone:
+ mappedType = go122.EvGCSweepEnd
+ case oldtrace.EvGoCreate:
+ if it.preInit {
+ it.createdPreInit[GoID(ev.Args[0])] = struct{}{}
+ return Event{}, errSkip
+ }
+ mappedType = go122.EvGoCreate
+ case oldtrace.EvGoStart:
+ if it.preInit {
+ mappedType = go122.EvGoStatus
+ mappedArgs = [4]uint64{ev.Args[0], ^uint64(0), uint64(go122.GoRunning)}
+ delete(it.createdPreInit, GoID(ev.Args[0]))
+ } else {
+ mappedType = go122.EvGoStart
+ }
+ case oldtrace.EvGoStartLabel:
+ it.extra = []Event{{
+ ctx: schedCtx{
+ G: GoID(ev.G),
+ P: ProcID(ev.P),
+ M: it.procMs[ProcID(ev.P)],
+ },
+ table: it.evt,
+ base: baseEvent{
+ typ: go122.EvGoLabel,
+ time: Time(ev.Ts),
+ args: [4]uint64{ev.Args[2]},
+ },
+ }}
+ return Event{
+ ctx: schedCtx{
+ G: GoID(ev.G),
+ P: ProcID(ev.P),
+ M: it.procMs[ProcID(ev.P)],
+ },
+ table: it.evt,
+ base: baseEvent{
+ typ: go122.EvGoStart,
+ time: Time(ev.Ts),
+ args: ev.Args,
+ },
+ }, nil
+ case oldtrace.EvGoEnd:
+ mappedType = go122.EvGoDestroy
+ case oldtrace.EvGoStop:
+ mappedType = go122.EvGoBlock
+ mappedArgs = [4]uint64{uint64(it.builtinToStringID[sForever]), uint64(ev.StkID)}
+ case oldtrace.EvGoSched:
+ mappedType = go122.EvGoStop
+ mappedArgs = [4]uint64{uint64(it.builtinToStringID[sGosched]), uint64(ev.StkID)}
+ case oldtrace.EvGoPreempt:
+ mappedType = go122.EvGoStop
+ mappedArgs = [4]uint64{uint64(it.builtinToStringID[sPreempted]), uint64(ev.StkID)}
+ case oldtrace.EvGoSleep:
+ mappedType = go122.EvGoBlock
+ mappedArgs = [4]uint64{uint64(it.builtinToStringID[sSleep]), uint64(ev.StkID)}
+ case oldtrace.EvGoBlock:
+ mappedType = go122.EvGoBlock
+ mappedArgs = [4]uint64{uint64(it.builtinToStringID[sEmpty]), uint64(ev.StkID)}
+ case oldtrace.EvGoUnblock:
+ mappedType = go122.EvGoUnblock
+ case oldtrace.EvGoBlockSend:
+ mappedType = go122.EvGoBlock
+ mappedArgs = [4]uint64{uint64(it.builtinToStringID[sChanSend]), uint64(ev.StkID)}
+ case oldtrace.EvGoBlockRecv:
+ mappedType = go122.EvGoBlock
+ mappedArgs = [4]uint64{uint64(it.builtinToStringID[sChanRecv]), uint64(ev.StkID)}
+ case oldtrace.EvGoBlockSelect:
+ mappedType = go122.EvGoBlock
+ mappedArgs = [4]uint64{uint64(it.builtinToStringID[sSelect]), uint64(ev.StkID)}
+ case oldtrace.EvGoBlockSync:
+ mappedType = go122.EvGoBlock
+ mappedArgs = [4]uint64{uint64(it.builtinToStringID[sSync]), uint64(ev.StkID)}
+ case oldtrace.EvGoBlockCond:
+ mappedType = go122.EvGoBlock
+ mappedArgs = [4]uint64{uint64(it.builtinToStringID[sSyncCond]), uint64(ev.StkID)}
+ case oldtrace.EvGoBlockNet:
+ mappedType = go122.EvGoBlock
+ mappedArgs = [4]uint64{uint64(it.builtinToStringID[sNetwork]), uint64(ev.StkID)}
+ case oldtrace.EvGoBlockGC:
+ mappedType = go122.EvGoBlock
+ mappedArgs = [4]uint64{uint64(it.builtinToStringID[sMarkAssistWait]), uint64(ev.StkID)}
+ case oldtrace.EvGoSysCall:
+ // Look for the next event for the same G to determine if the syscall
+ // blocked.
+ blocked := false
+ it.events.All()(func(nev *oldtrace.Event) bool {
+ if nev.G != ev.G {
+ return true
+ }
+ // After an EvGoSysCall, the next event on the same G will either be
+ // EvGoSysBlock to denote a blocking syscall, or some other event
+ // (or the end of the trace) if the syscall didn't block.
+ if nev.Type == oldtrace.EvGoSysBlock {
+ blocked = true
+ }
+ return false
+ })
+ if blocked {
+ mappedType = go122.EvGoSyscallBegin
+ mappedArgs = [4]uint64{1: uint64(ev.StkID)}
+ } else {
+ // Convert the old instantaneous syscall event to a pair of syscall
+ // begin and syscall end and give it the shortest possible duration,
+ // 1ns.
+ out1 := Event{
+ ctx: schedCtx{
+ G: GoID(ev.G),
+ P: ProcID(ev.P),
+ M: it.procMs[ProcID(ev.P)],
+ },
+ table: it.evt,
+ base: baseEvent{
+ typ: go122.EvGoSyscallBegin,
+ time: Time(ev.Ts),
+ args: [4]uint64{1: uint64(ev.StkID)},
+ },
+ }
+
+ out2 := Event{
+ ctx: out1.ctx,
+ table: it.evt,
+ base: baseEvent{
+ typ: go122.EvGoSyscallEnd,
+ time: Time(ev.Ts + 1),
+ args: [4]uint64{},
+ },
+ }
+
+ it.extra = append(it.extra, out2)
+ return out1, nil
+ }
+
+ case oldtrace.EvGoSysExit:
+ mappedType = go122.EvGoSyscallEndBlocked
+ case oldtrace.EvGoSysBlock:
+ return Event{}, errSkip
+ case oldtrace.EvGoWaiting:
+ mappedType = go122.EvGoStatus
+ mappedArgs = [4]uint64{ev.Args[0], ^uint64(0), uint64(go122.GoWaiting)}
+ delete(it.createdPreInit, GoID(ev.Args[0]))
+ case oldtrace.EvGoInSyscall:
+ mappedType = go122.EvGoStatus
+ // In the new tracer, GoStatus with GoSyscall knows what thread the
+ // syscall is on. In the old tracer, EvGoInSyscall doesn't contain that
+ // information and all we can do here is specify NoThread.
+ mappedArgs = [4]uint64{ev.Args[0], ^uint64(0), uint64(go122.GoSyscall)}
+ delete(it.createdPreInit, GoID(ev.Args[0]))
+ case oldtrace.EvHeapAlloc:
+ mappedType = go122.EvHeapAlloc
+ case oldtrace.EvHeapGoal:
+ mappedType = go122.EvHeapGoal
+ case oldtrace.EvGCMarkAssistStart:
+ mappedType = go122.EvGCMarkAssistBegin
+ case oldtrace.EvGCMarkAssistDone:
+ mappedType = go122.EvGCMarkAssistEnd
+ case oldtrace.EvUserTaskCreate:
+ mappedType = go122.EvUserTaskBegin
+ parent := ev.Args[1]
+ if parent == 0 {
+ parent = uint64(NoTask)
+ }
+ mappedArgs = [4]uint64{ev.Args[0], parent, ev.Args[2], uint64(ev.StkID)}
+ name, _ := it.evt.strings.get(stringID(ev.Args[2]))
+ it.tasks[TaskID(ev.Args[0])] = taskState{name: name, parentID: TaskID(ev.Args[1])}
+ case oldtrace.EvUserTaskEnd:
+ mappedType = go122.EvUserTaskEnd
+ // Event.Task expects the parent and name to be smuggled in extra args
+ // and as extra strings.
+ ts, ok := it.tasks[TaskID(ev.Args[0])]
+ if ok {
+ delete(it.tasks, TaskID(ev.Args[0]))
+ mappedArgs = [4]uint64{
+ ev.Args[0],
+ ev.Args[1],
+ uint64(ts.parentID),
+ uint64(it.evt.addExtraString(ts.name)),
+ }
+ } else {
+ mappedArgs = [4]uint64{ev.Args[0], ev.Args[1], uint64(NoTask), uint64(it.evt.addExtraString(""))}
+ }
+ case oldtrace.EvUserRegion:
+ switch ev.Args[1] {
+ case 0: // start
+ mappedType = go122.EvUserRegionBegin
+ case 1: // end
+ mappedType = go122.EvUserRegionEnd
+ }
+ mappedArgs = [4]uint64{ev.Args[0], ev.Args[2], uint64(ev.StkID)}
+ case oldtrace.EvUserLog:
+ mappedType = go122.EvUserLog
+ mappedArgs = [4]uint64{ev.Args[0], ev.Args[1], it.inlineToStringID[ev.Args[3]], uint64(ev.StkID)}
+ case oldtrace.EvCPUSample:
+ mappedType = go122.EvCPUSample
+ // When emitted by the Go 1.22 tracer, CPU samples have 5 arguments:
+ // timestamp, M, P, G, stack. However, after they get turned into Event,
+ // they have the arguments stack, M, P, G.
+ //
+ // In Go 1.21, CPU samples did not have Ms.
+ mappedArgs = [4]uint64{uint64(ev.StkID), ^uint64(0), uint64(ev.P), ev.G}
+ default:
+ return Event{}, fmt.Errorf("unexpected event type %v", ev.Type)
+ }
+
+ if oldtrace.EventDescriptions[ev.Type].Stack {
+ if stackIDs := go122.Specs()[mappedType].StackIDs; len(stackIDs) > 0 {
+ mappedArgs[stackIDs[0]-1] = uint64(ev.StkID)
+ }
+ }
+
+ m := NoThread
+ if ev.P != -1 && ev.Type != oldtrace.EvCPUSample {
+ if t, ok := it.procMs[ProcID(ev.P)]; ok {
+ m = ThreadID(t)
+ }
+ }
+ if ev.Type == oldtrace.EvProcStop {
+ delete(it.procMs, ProcID(ev.P))
+ }
+ g := GoID(ev.G)
+ if g == 0 {
+ g = NoGoroutine
+ }
+ out := Event{
+ ctx: schedCtx{
+ G: GoID(g),
+ P: ProcID(ev.P),
+ M: m,
+ },
+ table: it.evt,
+ base: baseEvent{
+ typ: mappedType,
+ time: Time(ev.Ts),
+ args: mappedArgs,
+ },
+ }
+ return out, nil
+}
+
+// convertOldFormat takes a fully loaded trace in the old trace format and
+// returns an iterator over events in the new format.
+func convertOldFormat(pr oldtrace.Trace) *oldTraceConverter {
+ it := &oldTraceConverter{}
+ it.init(pr)
+ return it
+}