From: Dominik Honnef Date: Sat, 20 Jan 2024 16:37:50 +0000 (+0100) Subject: internal/trace/v2: support old trace format X-Git-Tag: go1.23rc1~900 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=6133c1e4e202af2b2a6d4873d5a28ea3438e5554;p=gostls13.git internal/trace/v2: support old trace format Add support for traces from Go 1.11–1.19 by converting old traces to the Go 1.22 format on the fly. We import Gotraceui's trace parser, which is an optimized parser based on Go 1.19's internal/trace package, and further modify it for the needs of the conversion process. With the optimized parser, loading old traces using the new API is twice as fast and uses less total memory than 'go tool trace' did in older versions. The new parser does not, however, support traces from versions older than 1.11. This commit does not update cmd/trace to use the new API for old traces. Change-Id: If9380aa515e29445ff624274d1760ee945ca4816 Reviewed-on: https://go-review.googlesource.com/c/go/+/557356 Reviewed-by: Michael Knyszek Auto-Submit: Michael Knyszek Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI --- diff --git a/src/go/build/deps_test.go b/src/go/build/deps_test.go index 5d2230e8e1..26e6e8a77d 100644 --- a/src/go/build/deps_test.go +++ b/src/go/build/deps_test.go @@ -632,7 +632,10 @@ var depsRules = ` FMT, encoding/binary, internal/trace/v2/version < internal/trace/v2/raw; - FMT, encoding/binary, internal/trace/v2/version + FMT, internal/trace/v2/event, internal/trace/v2/version, io, sort, encoding/binary + < internal/trace/v2/internal/oldtrace; + + FMT, encoding/binary, internal/trace/v2/version, internal/trace/v2/internal/oldtrace < internal/trace/v2; regexp, internal/trace/v2, internal/trace/v2/raw, internal/txtar diff --git a/src/internal/trace/v2/event/go122/event.go b/src/internal/trace/v2/event/go122/event.go index be7ce4c806..f28e918231 100644 --- a/src/internal/trace/v2/event/go122/event.go +++ b/src/internal/trace/v2/event/go122/event.go @@ -41,7 +41,7 @@ const ( EvGoSyscallBegin // syscall enter [timestamp, P seq, stack ID] EvGoSyscallEnd // syscall exit [timestamp] EvGoSyscallEndBlocked // syscall exit and it blocked at some point [timestamp] - EvGoStatus // goroutine status at the start of a generation [timestamp, goroutine ID, status] + EvGoStatus // goroutine status at the start of a generation [timestamp, goroutine ID, thread ID, status] // STW. EvSTWBegin // STW start [timestamp, kind] @@ -66,7 +66,7 @@ const ( EvUserTaskEnd // end of a task [timestamp, internal task ID, stack ID] EvUserRegionBegin // trace.{Start,With}Region [timestamp, internal task ID, name string ID, stack ID] EvUserRegionEnd // trace.{End,With}Region [timestamp, internal task ID, name string ID, stack ID] - EvUserLog // trace.Log [timestamp, internal task ID, key string ID, stack, value string ID] + EvUserLog // trace.Log [timestamp, internal task ID, key string ID, value string ID, stack] ) // EventString returns the name of a Go 1.22 event. @@ -108,7 +108,7 @@ var specs = [...]event.Spec{ }, EvCPUSample: event.Spec{ Name: "CPUSample", - Args: []string{"time", "p", "g", "m", "stack"}, + Args: []string{"time", "m", "p", "g", "stack"}, // N.B. There's clearly a timestamp here, but these Events // are special in that they don't appear in the regular // M streams. diff --git a/src/internal/trace/v2/generation.go b/src/internal/trace/v2/generation.go index da31585266..34ea4a147b 100644 --- a/src/internal/trace/v2/generation.go +++ b/src/internal/trace/v2/generation.go @@ -328,7 +328,7 @@ func addStacks(stackTable *dataTable[stackID, stack], pcs map[uint64]frame, b ba // sample contained therein to the provided samples list. func addCPUSamples(samples []cpuSample, b batch) ([]cpuSample, error) { if !b.isCPUSamplesBatch() { - return nil, fmt.Errorf("internal error: addStrings called on non-string batch") + return nil, fmt.Errorf("internal error: addCPUSamples called on non-CPU-sample batch") } r := bytes.NewReader(b.data) hdr, err := r.ReadByte() // Consume the EvCPUSamples byte. diff --git a/src/internal/trace/v2/internal/oldtrace/order.go b/src/internal/trace/v2/internal/oldtrace/order.go new file mode 100644 index 0000000000..4cd3def211 --- /dev/null +++ b/src/internal/trace/v2/internal/oldtrace/order.go @@ -0,0 +1,186 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package oldtrace + +import "errors" + +type orderEvent struct { + ev Event + proc *proc +} + +type gStatus int + +type gState struct { + seq uint64 + status gStatus +} + +const ( + gDead gStatus = iota + gRunnable + gRunning + gWaiting + + unordered = ^uint64(0) + garbage = ^uint64(0) - 1 + noseq = ^uint64(0) + seqinc = ^uint64(0) - 1 +) + +// stateTransition returns goroutine state (sequence and status) when the event +// becomes ready for merging (init) and the goroutine state after the event (next). +func stateTransition(ev *Event) (g uint64, init, next gState) { + // Note that we have an explicit return in each case, as that produces slightly better code (tested on Go 1.19). + + switch ev.Type { + case EvGoCreate: + g = ev.Args[0] + init = gState{0, gDead} + next = gState{1, gRunnable} + return + case EvGoWaiting, EvGoInSyscall: + g = ev.G + init = gState{1, gRunnable} + next = gState{2, gWaiting} + return + case EvGoStart, EvGoStartLabel: + g = ev.G + init = gState{ev.Args[1], gRunnable} + next = gState{ev.Args[1] + 1, gRunning} + return + case EvGoStartLocal: + // noseq means that this event is ready for merging as soon as + // frontier reaches it (EvGoStartLocal is emitted on the same P + // as the corresponding EvGoCreate/EvGoUnblock, and thus the latter + // is already merged). + // seqinc is a stub for cases when event increments g sequence, + // but since we don't know current seq we also don't know next seq. + g = ev.G + init = gState{noseq, gRunnable} + next = gState{seqinc, gRunning} + return + case EvGoBlock, EvGoBlockSend, EvGoBlockRecv, EvGoBlockSelect, + EvGoBlockSync, EvGoBlockCond, EvGoBlockNet, EvGoSleep, + EvGoSysBlock, EvGoBlockGC: + g = ev.G + init = gState{noseq, gRunning} + next = gState{noseq, gWaiting} + return + case EvGoSched, EvGoPreempt: + g = ev.G + init = gState{noseq, gRunning} + next = gState{noseq, gRunnable} + return + case EvGoUnblock, EvGoSysExit: + g = ev.Args[0] + init = gState{ev.Args[1], gWaiting} + next = gState{ev.Args[1] + 1, gRunnable} + return + case EvGoUnblockLocal, EvGoSysExitLocal: + g = ev.Args[0] + init = gState{noseq, gWaiting} + next = gState{seqinc, gRunnable} + return + case EvGCStart: + g = garbage + init = gState{ev.Args[0], gDead} + next = gState{ev.Args[0] + 1, gDead} + return + default: + // no ordering requirements + g = unordered + return + } +} + +func transitionReady(g uint64, curr, init gState) bool { + return g == unordered || (init.seq == noseq || init.seq == curr.seq) && init.status == curr.status +} + +func transition(gs map[uint64]gState, g uint64, init, next gState) error { + if g == unordered { + return nil + } + curr := gs[g] + if !transitionReady(g, curr, init) { + // See comment near the call to transition, where we're building the frontier, for details on how this could + // possibly happen. + return errors.New("encountered impossible goroutine state transition") + } + switch next.seq { + case noseq: + next.seq = curr.seq + case seqinc: + next.seq = curr.seq + 1 + } + gs[g] = next + return nil +} + +type orderEventList []orderEvent + +func (l *orderEventList) Less(i, j int) bool { + return (*l)[i].ev.Ts < (*l)[j].ev.Ts +} + +type eventList []Event + +func (l *eventList) Len() int { + return len(*l) +} + +func (l *eventList) Less(i, j int) bool { + return (*l)[i].Ts < (*l)[j].Ts +} + +func (l *eventList) Swap(i, j int) { + (*l)[i], (*l)[j] = (*l)[j], (*l)[i] +} + +func (h *orderEventList) Push(x orderEvent) { + *h = append(*h, x) + heapUp(h, len(*h)-1) +} + +func (h *orderEventList) Pop() orderEvent { + n := len(*h) - 1 + (*h)[0], (*h)[n] = (*h)[n], (*h)[0] + heapDown(h, 0, n) + x := (*h)[len(*h)-1] + *h = (*h)[:len(*h)-1] + return x +} + +func heapUp(h *orderEventList, j int) { + for { + i := (j - 1) / 2 // parent + if i == j || !h.Less(j, i) { + break + } + (*h)[i], (*h)[j] = (*h)[j], (*h)[i] + j = i + } +} + +func heapDown(h *orderEventList, i0, n int) bool { + i := i0 + for { + j1 := 2*i + 1 + if j1 >= n || j1 < 0 { // j1 < 0 after int overflow + break + } + j := j1 // left child + if j2 := j1 + 1; j2 < n && h.Less(j2, j1) { + j = j2 // = 2*i + 2 // right child + } + if !h.Less(j, i) { + break + } + (*h)[i], (*h)[j] = (*h)[j], (*h)[i] + i = j + } + return i > i0 +} diff --git a/src/internal/trace/v2/internal/oldtrace/parser.go b/src/internal/trace/v2/internal/oldtrace/parser.go new file mode 100644 index 0000000000..da1079b34b --- /dev/null +++ b/src/internal/trace/v2/internal/oldtrace/parser.go @@ -0,0 +1,1540 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package oldtrace implements a parser for Go execution traces from versions +// 1.11–1.21. +// +// The package started as a copy of Go 1.19's internal/trace, but has been +// optimized to be faster while using less memory and fewer allocations. It has +// been further modified for the specific purpose of converting traces to the +// new 1.22+ format. +package oldtrace + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "internal/trace/v2/event" + "internal/trace/v2/version" + "io" + "math" + "sort" +) + +// Timestamp represents a count of nanoseconds since the beginning of the trace. +// They can only be meaningfully compared with other timestamps from the same +// trace. +type Timestamp int64 + +// Event describes one event in the trace. +type Event struct { + // The Event type is carefully laid out to optimize its size and to avoid + // pointers, the latter so that the garbage collector won't have to scan any + // memory of our millions of events. + + Ts Timestamp // timestamp in nanoseconds + G uint64 // G on which the event happened + Args [4]uint64 // event-type-specific arguments + StkID uint32 // unique stack ID + P int32 // P on which the event happened (can be a real P or one of TimerP, NetpollP, SyscallP) + Type event.Type // one of Ev* +} + +// Frame is a frame in stack traces. +type Frame struct { + PC uint64 + // string ID of the function name + Fn uint64 + // string ID of the file name + File uint64 + Line int +} + +const ( + // Special P identifiers: + FakeP = 1000000 + iota + TimerP // contains timer unblocks + NetpollP // contains network unblocks + SyscallP // contains returns from syscalls + GCP // contains GC state + ProfileP // contains recording of CPU profile samples +) + +// Trace is the result of Parse. +type Trace struct { + Version version.Version + + // Events is the sorted list of Events in the trace. + Events Events + // Stacks is the stack traces (stored as slices of PCs), keyed by stack IDs + // from the trace. + Stacks map[uint32][]uint64 + PCs map[uint64]Frame + Strings map[uint64]string + InlineStrings []string +} + +// batchOffset records the byte offset of, and number of events in, a batch. A +// batch is a sequence of events emitted by a P. Events within a single batch +// are sorted by time. +type batchOffset struct { + offset int + numEvents int +} + +type parser struct { + ver version.Version + data []byte + off int + + strings map[uint64]string + inlineStrings []string + inlineStringsMapping map[string]int + // map from Ps to their batch offsets + batchOffsets map[int32][]batchOffset + stacks map[uint32][]uint64 + stacksData []uint64 + ticksPerSec int64 + pcs map[uint64]Frame + cpuSamples []Event + timerGoids map[uint64]bool + + // state for readRawEvent + args []uint64 + + // state for parseEvent + lastTs Timestamp + lastG uint64 + // map from Ps to the last Gs that ran on them + lastGs map[int32]uint64 + lastP int32 +} + +func (p *parser) discard(n uint64) bool { + if n > math.MaxInt { + return false + } + if noff := p.off + int(n); noff < p.off || noff > len(p.data) { + return false + } else { + p.off = noff + } + return true +} + +func newParser(r io.Reader, ver version.Version) (*parser, error) { + var buf []byte + if seeker, ok := r.(io.Seeker); ok { + // Determine the size of the reader so that we can allocate a buffer + // without having to grow it later. + cur, err := seeker.Seek(0, io.SeekCurrent) + if err != nil { + return nil, err + } + end, err := seeker.Seek(0, io.SeekEnd) + if err != nil { + return nil, err + } + _, err = seeker.Seek(cur, io.SeekStart) + if err != nil { + return nil, err + } + + buf = make([]byte, end-cur) + _, err = io.ReadFull(r, buf) + if err != nil { + return nil, err + } + } else { + var err error + buf, err = io.ReadAll(r) + if err != nil { + return nil, err + } + } + return &parser{data: buf, ver: ver, timerGoids: make(map[uint64]bool)}, nil +} + +// Parse parses Go execution traces from versions 1.11–1.21. The provided reader +// will be read to completion and the entire trace will be materialized in +// memory. That is, this function does not allow incremental parsing. +// +// The reader has to be positioned just after the trace header and vers needs to +// be the version of the trace. This can be achieved by using +// version.ReadHeader. +func Parse(r io.Reader, vers version.Version) (Trace, error) { + // We accept the version as an argument because internal/trace/v2 will have + // already read the version to determine which parser to use. + p, err := newParser(r, vers) + if err != nil { + return Trace{}, err + } + return p.parse() +} + +// parse parses, post-processes and verifies the trace. +func (p *parser) parse() (Trace, error) { + defer func() { + p.data = nil + }() + + // We parse a trace by running the following steps in order: + // + // 1. In the initial pass we collect information about batches (their + // locations and sizes.) We also parse CPU profiling samples in this + // step, simply to reduce the number of full passes that we need. + // + // 2. In the second pass we parse batches and merge them into a globally + // ordered event stream. This uses the batch information from the first + // pass to quickly find batches. + // + // 3. After all events have been parsed we convert their timestamps from CPU + // ticks to wall time. Furthermore we move timers and syscalls to + // dedicated, fake Ps. + // + // 4. Finally, we validate the trace. + + p.strings = make(map[uint64]string) + p.batchOffsets = make(map[int32][]batchOffset) + p.lastGs = make(map[int32]uint64) + p.stacks = make(map[uint32][]uint64) + p.pcs = make(map[uint64]Frame) + p.inlineStringsMapping = make(map[string]int) + + if err := p.collectBatchesAndCPUSamples(); err != nil { + return Trace{}, err + } + + events, err := p.parseEventBatches() + if err != nil { + return Trace{}, err + } + + if p.ticksPerSec == 0 { + return Trace{}, errors.New("no EvFrequency event") + } + + if events.Len() > 0 { + // Translate cpu ticks to real time. + minTs := events.Ptr(0).Ts + // Use floating point to avoid integer overflows. + freq := 1e9 / float64(p.ticksPerSec) + for i := 0; i < events.Len(); i++ { + ev := events.Ptr(i) + ev.Ts = Timestamp(float64(ev.Ts-minTs) * freq) + // Move timers and syscalls to separate fake Ps. + if p.timerGoids[ev.G] && ev.Type == EvGoUnblock { + ev.P = TimerP + } + if ev.Type == EvGoSysExit { + ev.P = SyscallP + } + } + } + + if err := p.postProcessTrace(events); err != nil { + return Trace{}, err + } + + res := Trace{ + Version: p.ver, + Events: events, + Stacks: p.stacks, + Strings: p.strings, + InlineStrings: p.inlineStrings, + PCs: p.pcs, + } + return res, nil +} + +// rawEvent is a helper type used during parsing. +type rawEvent struct { + typ event.Type + args []uint64 + sargs []string + + // if typ == EvBatch, these fields describe the batch. + batchPid int32 + batchOffset int +} + +type proc struct { + pid int32 + // the remaining events in the current batch + events []Event + // buffer for reading batches into, aliased by proc.events + buf []Event + + // there are no more batches left + done bool +} + +const eventsBucketSize = 524288 // 32 MiB of events + +type Events struct { + // Events is a slice of slices that grows one slice of size eventsBucketSize + // at a time. This avoids the O(n) cost of slice growth in append, and + // additionally allows consumers to drop references to parts of the data, + // freeing memory piecewise. + n int + buckets []*[eventsBucketSize]Event + off int +} + +// grow grows the slice by one and returns a pointer to the new element, without +// overwriting it. +func (l *Events) grow() *Event { + a, b := l.index(l.n) + if a >= len(l.buckets) { + l.buckets = append(l.buckets, new([eventsBucketSize]Event)) + } + ptr := &l.buckets[a][b] + l.n++ + return ptr +} + +// append appends v to the slice and returns a pointer to the new element. +func (l *Events) append(v Event) *Event { + ptr := l.grow() + *ptr = v + return ptr +} + +func (l *Events) Ptr(i int) *Event { + a, b := l.index(i + l.off) + return &l.buckets[a][b] +} + +func (l *Events) index(i int) (int, int) { + // Doing the division on uint instead of int compiles this function to a + // shift and an AND (for power of 2 bucket sizes), versus a whole bunch of + // instructions for int. + return int(uint(i) / eventsBucketSize), int(uint(i) % eventsBucketSize) +} + +func (l *Events) Len() int { + return l.n - l.off +} + +func (l *Events) Less(i, j int) bool { + return l.Ptr(i).Ts < l.Ptr(j).Ts +} + +func (l *Events) Swap(i, j int) { + *l.Ptr(i), *l.Ptr(j) = *l.Ptr(j), *l.Ptr(i) +} + +func (l *Events) Pop() (*Event, bool) { + if l.off == l.n { + return nil, false + } + a, b := l.index(l.off) + ptr := &l.buckets[a][b] + l.off++ + if b == eventsBucketSize-1 || l.off == l.n { + // We've consumed the last event from the bucket, so drop the bucket and + // allow GC to collect it. + l.buckets[a] = nil + } + return ptr, true +} + +func (l *Events) All() func(yield func(ev *Event) bool) { + return func(yield func(ev *Event) bool) { + for i := 0; i < l.Len(); i++ { + a, b := l.index(i + l.off) + ptr := &l.buckets[a][b] + if !yield(ptr) { + return + } + } + } +} + +// parseEventBatches reads per-P event batches and merges them into a single, consistent +// stream. The high level idea is as follows. Events within an individual batch +// are in correct order, because they are emitted by a single P. So we need to +// produce a correct interleaving of the batches. To do this we take first +// unmerged event from each batch (frontier). Then choose subset that is "ready" +// to be merged, that is, events for which all dependencies are already merged. +// Then we choose event with the lowest timestamp from the subset, merge it and +// repeat. This approach ensures that we form a consistent stream even if +// timestamps are incorrect (condition observed on some machines). +func (p *parser) parseEventBatches() (Events, error) { + // The ordering of CPU profile sample events in the data stream is based on + // when each run of the signal handler was able to acquire the spinlock, + // with original timestamps corresponding to when ReadTrace pulled the data + // off of the profBuf queue. Re-sort them by the timestamp we captured + // inside the signal handler. + sort.Sort((*eventList)(&p.cpuSamples)) + + allProcs := make([]proc, 0, len(p.batchOffsets)) + for pid := range p.batchOffsets { + allProcs = append(allProcs, proc{pid: pid}) + } + allProcs = append(allProcs, proc{pid: ProfileP, events: p.cpuSamples}) + + events := Events{} + + // Merge events as long as at least one P has more events + gs := make(map[uint64]gState) + // Note: technically we don't need a priority queue here. We're only ever + // interested in the earliest elligible event, which means we just have to + // track the smallest element. However, in practice, the priority queue + // performs better, because for each event we only have to compute its state + // transition once, not on each iteration. If it was elligible before, it'll + // already be in the queue. Furthermore, on average, we only have one P to + // look at in each iteration, because all other Ps are already in the queue. + var frontier orderEventList + + availableProcs := make([]*proc, len(allProcs)) + for i := range allProcs { + availableProcs[i] = &allProcs[i] + } + for { + pidLoop: + for i := 0; i < len(availableProcs); i++ { + proc := availableProcs[i] + + for len(proc.events) == 0 { + // Call loadBatch in a loop because sometimes batches are empty + evs, err := p.loadBatch(proc.pid, proc.buf[:0]) + proc.buf = evs[:0] + if err == io.EOF { + // This P has no more events + proc.done = true + availableProcs[i], availableProcs[len(availableProcs)-1] = availableProcs[len(availableProcs)-1], availableProcs[i] + availableProcs = availableProcs[:len(availableProcs)-1] + // We swapped the element at i with another proc, so look at + // the index again + i-- + continue pidLoop + } else if err != nil { + return Events{}, err + } else { + proc.events = evs + } + } + + ev := &proc.events[0] + g, init, _ := stateTransition(ev) + + // TODO(dh): This implementation matches the behavior of the + // upstream 'go tool trace', and works in practice, but has run into + // the following inconsistency during fuzzing: what happens if + // multiple Ps have events for the same G? While building the + // frontier we will check all of the events against the current + // state of the G. However, when we process the frontier, the state + // of the G changes, and a transition that was valid while building + // the frontier may no longer be valid when processing the frontier. + // Is this something that can happen for real, valid traces, or is + // this only possible with corrupt data? + if !transitionReady(g, gs[g], init) { + continue + } + proc.events = proc.events[1:] + availableProcs[i], availableProcs[len(availableProcs)-1] = availableProcs[len(availableProcs)-1], availableProcs[i] + availableProcs = availableProcs[:len(availableProcs)-1] + frontier.Push(orderEvent{*ev, proc}) + + // We swapped the element at i with another proc, so look at the + // index again + i-- + } + + if len(frontier) == 0 { + for i := range allProcs { + if !allProcs[i].done { + return Events{}, fmt.Errorf("no consistent ordering of events possible") + } + } + break + } + f := frontier.Pop() + + // We're computing the state transition twice, once when computing the + // frontier, and now to apply the transition. This is fine because + // stateTransition is a pure function. Computing it again is cheaper + // than storing large items in the frontier. + g, init, next := stateTransition(&f.ev) + + // Get rid of "Local" events, they are intended merely for ordering. + switch f.ev.Type { + case EvGoStartLocal: + f.ev.Type = EvGoStart + case EvGoUnblockLocal: + f.ev.Type = EvGoUnblock + case EvGoSysExitLocal: + f.ev.Type = EvGoSysExit + } + events.append(f.ev) + + if err := transition(gs, g, init, next); err != nil { + return Events{}, err + } + availableProcs = append(availableProcs, f.proc) + } + + // At this point we have a consistent stream of events. Make sure time + // stamps respect the ordering. The tests will skip (not fail) the test case + // if they see this error. + if !sort.IsSorted(&events) { + return Events{}, ErrTimeOrder + } + + // The last part is giving correct timestamps to EvGoSysExit events. The + // problem with EvGoSysExit is that actual syscall exit timestamp + // (ev.Args[2]) is potentially acquired long before event emission. So far + // we've used timestamp of event emission (ev.Ts). We could not set ev.Ts = + // ev.Args[2] earlier, because it would produce seemingly broken timestamps + // (misplaced event). We also can't simply update the timestamp and resort + // events, because if timestamps are broken we will misplace the event and + // later report logically broken trace (instead of reporting broken + // timestamps). + lastSysBlock := make(map[uint64]Timestamp) + for i := 0; i < events.Len(); i++ { + ev := events.Ptr(i) + switch ev.Type { + case EvGoSysBlock, EvGoInSyscall: + lastSysBlock[ev.G] = ev.Ts + case EvGoSysExit: + ts := Timestamp(ev.Args[2]) + if ts == 0 { + continue + } + block := lastSysBlock[ev.G] + if block == 0 { + return Events{}, fmt.Errorf("stray syscall exit") + } + if ts < block { + return Events{}, ErrTimeOrder + } + ev.Ts = ts + } + } + sort.Stable(&events) + + return events, nil +} + +// collectBatchesAndCPUSamples records the offsets of batches and parses CPU samples. +func (p *parser) collectBatchesAndCPUSamples() error { + // Read events. + var raw rawEvent + var curP int32 + for n := uint64(0); ; n++ { + err := p.readRawEvent(skipArgs|skipStrings, &raw) + if err == io.EOF { + break + } + if err != nil { + return err + } + if raw.typ == EvNone { + continue + } + + if raw.typ == EvBatch { + bo := batchOffset{offset: raw.batchOffset} + p.batchOffsets[raw.batchPid] = append(p.batchOffsets[raw.batchPid], bo) + curP = raw.batchPid + } + + batches := p.batchOffsets[curP] + if len(batches) == 0 { + return fmt.Errorf("read event %d with current P of %d, but P has no batches yet", + raw.typ, curP) + } + batches[len(batches)-1].numEvents++ + + if raw.typ == EvCPUSample { + e := Event{Type: raw.typ} + + argOffset := 1 + narg := raw.argNum() + if len(raw.args) != narg { + return fmt.Errorf("CPU sample has wrong number of arguments: want %d, got %d", narg, len(raw.args)) + } + for i := argOffset; i < narg; i++ { + if i == narg-1 { + e.StkID = uint32(raw.args[i]) + } else { + e.Args[i-argOffset] = raw.args[i] + } + } + + e.Ts = Timestamp(e.Args[0]) + e.P = int32(e.Args[1]) + e.G = e.Args[2] + e.Args[0] = 0 + + // Most events are written out by the active P at the exact moment + // they describe. CPU profile samples are different because they're + // written to the tracing log after some delay, by a separate worker + // goroutine, into a separate buffer. + // + // We keep these in their own batch until all of the batches are + // merged in timestamp order. We also (right before the merge) + // re-sort these events by the timestamp captured in the profiling + // signal handler. + // + // Note that we're not concerned about the memory usage of storing + // all CPU samples during the indexing phase. There are orders of + // magnitude fewer CPU samples than runtime events. + p.cpuSamples = append(p.cpuSamples, e) + } + } + + return nil +} + +const ( + skipArgs = 1 << iota + skipStrings +) + +func (p *parser) readByte() (byte, bool) { + if p.off < len(p.data) && p.off >= 0 { + b := p.data[p.off] + p.off++ + return b, true + } else { + return 0, false + } +} + +func (p *parser) readFull(n int) ([]byte, error) { + if p.off >= len(p.data) || p.off < 0 || p.off+n > len(p.data) { + // p.off < 0 is impossible but makes BCE happy. + // + // We do fail outright if there's not enough data, we don't care about + // partial results. + return nil, io.ErrUnexpectedEOF + } + buf := p.data[p.off : p.off+n] + p.off += n + return buf, nil +} + +// readRawEvent reads a raw event into ev. The slices in ev are only valid until +// the next call to readRawEvent, even when storing to a different location. +func (p *parser) readRawEvent(flags uint, ev *rawEvent) error { + // The number of arguments is encoded using two bits and can thus only + // represent the values 0–3. The value 3 (on the wire) indicates that + // arguments are prefixed by their byte length, to encode >=3 arguments. + const inlineArgs = 3 + + // Read event type and number of arguments (1 byte). + b, ok := p.readByte() + if !ok { + return io.EOF + } + typ := event.Type(b << 2 >> 2) + // Most events have a timestamp before the actual arguments, so we add 1 and + // parse it like it's the first argument. EvString has a special format and + // the number of arguments doesn't matter. EvBatch writes '1' as the number + // of arguments, but actually has two: a pid and a timestamp, but here the + // timestamp is the second argument, not the first; adding 1 happens to come + // up with the correct number, but it doesn't matter, because EvBatch has + // custom logic for parsing. + // + // Note that because we're adding 1, inlineArgs == 3 describes the largest + // number of logical arguments that isn't length-prefixed, even though the + // value 3 on the wire indicates length-prefixing. For us, that becomes narg + // == 4. + narg := b>>6 + 1 + if typ == EvNone || typ >= EvCount || EventDescriptions[typ].minVersion > p.ver { + return fmt.Errorf("unknown event type %d", typ) + } + + switch typ { + case EvString: + if flags&skipStrings != 0 { + // String dictionary entry [ID, length, string]. + if _, err := p.readVal(); err != nil { + return errMalformedVarint + } + ln, err := p.readVal() + if err != nil { + return err + } + if !p.discard(ln) { + return fmt.Errorf("failed to read trace: %w", io.EOF) + } + } else { + // String dictionary entry [ID, length, string]. + id, err := p.readVal() + if err != nil { + return err + } + if id == 0 { + return errors.New("string has invalid id 0") + } + if p.strings[id] != "" { + return fmt.Errorf("string has duplicate id %d", id) + } + var ln uint64 + ln, err = p.readVal() + if err != nil { + return err + } + if ln == 0 { + return errors.New("string has invalid length 0") + } + if ln > 1e6 { + return fmt.Errorf("string has too large length %d", ln) + } + buf, err := p.readFull(int(ln)) + if err != nil { + return fmt.Errorf("failed to read trace: %w", err) + } + p.strings[id] = string(buf) + } + + ev.typ = EvNone + return nil + case EvBatch: + if want := byte(2); narg != want { + return fmt.Errorf("EvBatch has wrong number of arguments: got %d, want %d", narg, want) + } + + // -1 because we've already read the first byte of the batch + off := p.off - 1 + + pid, err := p.readVal() + if err != nil { + return err + } + if pid != math.MaxUint64 && pid > math.MaxInt32 { + return fmt.Errorf("processor ID %d is larger than maximum of %d", pid, uint64(math.MaxUint)) + } + + var pid32 int32 + if pid == math.MaxUint64 { + pid32 = -1 + } else { + pid32 = int32(pid) + } + + v, err := p.readVal() + if err != nil { + return err + } + + *ev = rawEvent{ + typ: EvBatch, + args: p.args[:0], + batchPid: pid32, + batchOffset: off, + } + ev.args = append(ev.args, pid, v) + return nil + default: + *ev = rawEvent{typ: typ, args: p.args[:0]} + if narg <= inlineArgs { + if flags&skipArgs == 0 { + for i := 0; i < int(narg); i++ { + v, err := p.readVal() + if err != nil { + return fmt.Errorf("failed to read event %d argument: %w", typ, err) + } + ev.args = append(ev.args, v) + } + } else { + for i := 0; i < int(narg); i++ { + if _, err := p.readVal(); err != nil { + return fmt.Errorf("failed to read event %d argument: %w", typ, errMalformedVarint) + } + } + } + } else { + // More than inlineArgs args, the first value is length of the event + // in bytes. + v, err := p.readVal() + if err != nil { + return fmt.Errorf("failed to read event %d argument: %w", typ, err) + } + + if limit := uint64(2048); v > limit { + // At the time of Go 1.19, v seems to be at most 128. Set 2048 + // as a generous upper limit and guard against malformed traces. + return fmt.Errorf("failed to read event %d argument: length-prefixed argument too big: %d bytes, limit is %d", typ, v, limit) + } + + if flags&skipArgs == 0 || typ == EvCPUSample { + buf, err := p.readFull(int(v)) + if err != nil { + return fmt.Errorf("failed to read trace: %w", err) + } + for len(buf) > 0 { + var v uint64 + v, buf, err = readValFrom(buf) + if err != nil { + return err + } + ev.args = append(ev.args, v) + } + } else { + // Skip over arguments + if !p.discard(v) { + return fmt.Errorf("failed to read trace: %w", io.EOF) + } + } + if typ == EvUserLog { + // EvUserLog records are followed by a value string + if flags&skipArgs == 0 { + // Read string + s, err := p.readStr() + if err != nil { + return err + } + ev.sargs = append(ev.sargs, s) + } else { + // Skip string + v, err := p.readVal() + if err != nil { + return err + } + if !p.discard(v) { + return io.EOF + } + } + } + } + + p.args = ev.args[:0] + return nil + } +} + +// loadBatch loads the next batch for pid and appends its contents to to events. +func (p *parser) loadBatch(pid int32, events []Event) ([]Event, error) { + offsets := p.batchOffsets[pid] + if len(offsets) == 0 { + return nil, io.EOF + } + n := offsets[0].numEvents + offset := offsets[0].offset + offsets = offsets[1:] + p.batchOffsets[pid] = offsets + + p.off = offset + + if cap(events) < n { + events = make([]Event, 0, n) + } + + gotHeader := false + var raw rawEvent + var ev Event + for { + err := p.readRawEvent(0, &raw) + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + if raw.typ == EvNone || raw.typ == EvCPUSample { + continue + } + if raw.typ == EvBatch { + if gotHeader { + break + } else { + gotHeader = true + } + } + + err = p.parseEvent(&raw, &ev) + if err != nil { + return nil, err + } + if ev.Type != EvNone { + events = append(events, ev) + } + } + + return events, nil +} + +func (p *parser) readStr() (s string, err error) { + sz, err := p.readVal() + if err != nil { + return "", err + } + if sz == 0 { + return "", nil + } + if sz > 1e6 { + return "", fmt.Errorf("string is too large (len=%d)", sz) + } + buf, err := p.readFull(int(sz)) + if err != nil { + return "", fmt.Errorf("failed to read trace: %w", err) + } + return string(buf), nil +} + +// parseEvent transforms raw events into events. +// It does analyze and verify per-event-type arguments. +func (p *parser) parseEvent(raw *rawEvent, ev *Event) error { + desc := &EventDescriptions[raw.typ] + if desc.Name == "" { + return fmt.Errorf("missing description for event type %d", raw.typ) + } + narg := raw.argNum() + if len(raw.args) != narg { + return fmt.Errorf("%s has wrong number of arguments: want %d, got %d", desc.Name, narg, len(raw.args)) + } + switch raw.typ { + case EvBatch: + p.lastGs[p.lastP] = p.lastG + if raw.args[0] != math.MaxUint64 && raw.args[0] > math.MaxInt32 { + return fmt.Errorf("processor ID %d is larger than maximum of %d", raw.args[0], uint64(math.MaxInt32)) + } + if raw.args[0] == math.MaxUint64 { + p.lastP = -1 + } else { + p.lastP = int32(raw.args[0]) + } + p.lastG = p.lastGs[p.lastP] + p.lastTs = Timestamp(raw.args[1]) + case EvFrequency: + p.ticksPerSec = int64(raw.args[0]) + if p.ticksPerSec <= 0 { + // The most likely cause for this is tick skew on different CPUs. + // For example, solaris/amd64 seems to have wildly different + // ticks on different CPUs. + return ErrTimeOrder + } + case EvTimerGoroutine: + p.timerGoids[raw.args[0]] = true + case EvStack: + if len(raw.args) < 2 { + return fmt.Errorf("EvStack has wrong number of arguments: want at least 2, got %d", len(raw.args)) + } + size := raw.args[1] + if size > 1000 { + return fmt.Errorf("EvStack has bad number of frames: %d", size) + } + want := 2 + 4*size + if uint64(len(raw.args)) != want { + return fmt.Errorf("EvStack has wrong number of arguments: want %d, got %d", want, len(raw.args)) + } + id := uint32(raw.args[0]) + if id != 0 && size > 0 { + stk := p.allocateStack(size) + for i := 0; i < int(size); i++ { + pc := raw.args[2+i*4+0] + fn := raw.args[2+i*4+1] + file := raw.args[2+i*4+2] + line := raw.args[2+i*4+3] + stk[i] = pc + + if _, ok := p.pcs[pc]; !ok { + p.pcs[pc] = Frame{PC: pc, Fn: fn, File: file, Line: int(line)} + } + } + p.stacks[id] = stk + } + case EvCPUSample: + // These events get parsed during the indexing step and don't strictly + // belong to the batch. + default: + *ev = Event{Type: raw.typ, P: p.lastP, G: p.lastG} + var argOffset int + ev.Ts = p.lastTs + Timestamp(raw.args[0]) + argOffset = 1 + p.lastTs = ev.Ts + for i := argOffset; i < narg; i++ { + if i == narg-1 && desc.Stack { + ev.StkID = uint32(raw.args[i]) + } else { + ev.Args[i-argOffset] = raw.args[i] + } + } + switch raw.typ { + case EvGoStart, EvGoStartLocal, EvGoStartLabel: + p.lastG = ev.Args[0] + ev.G = p.lastG + case EvGoEnd, EvGoStop, EvGoSched, EvGoPreempt, + EvGoSleep, EvGoBlock, EvGoBlockSend, EvGoBlockRecv, + EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet, + EvGoSysBlock, EvGoBlockGC: + p.lastG = 0 + case EvGoSysExit, EvGoWaiting, EvGoInSyscall: + ev.G = ev.Args[0] + case EvUserTaskCreate: + // e.Args 0: taskID, 1:parentID, 2:nameID + case EvUserRegion: + // e.Args 0: taskID, 1: mode, 2:nameID + case EvUserLog: + // e.Args 0: taskID, 1:keyID, 2: stackID, 3: messageID + // raw.sargs 0: message + + if id, ok := p.inlineStringsMapping[raw.sargs[0]]; ok { + ev.Args[3] = uint64(id) + } else { + id := len(p.inlineStrings) + p.inlineStringsMapping[raw.sargs[0]] = id + p.inlineStrings = append(p.inlineStrings, raw.sargs[0]) + ev.Args[3] = uint64(id) + } + } + + return nil + } + + ev.Type = EvNone + return nil +} + +// ErrTimeOrder is returned by Parse when the trace contains +// time stamps that do not respect actual event ordering. +var ErrTimeOrder = errors.New("time stamps out of order") + +// postProcessTrace does inter-event verification and information restoration. +// The resulting trace is guaranteed to be consistent +// (for example, a P does not run two Gs at the same time, or a G is indeed +// blocked before an unblock event). +func (p *parser) postProcessTrace(events Events) error { + const ( + gDead = iota + gRunnable + gRunning + gWaiting + ) + type gdesc struct { + state int + ev *Event + evStart *Event + evCreate *Event + evMarkAssist *Event + } + type pdesc struct { + running bool + g uint64 + evSweep *Event + } + + gs := make(map[uint64]gdesc) + ps := make(map[int32]pdesc) + tasks := make(map[uint64]*Event) // task id to task creation events + activeRegions := make(map[uint64][]*Event) // goroutine id to stack of regions + gs[0] = gdesc{state: gRunning} + var evGC, evSTW *Event + + checkRunning := func(p pdesc, g gdesc, ev *Event, allowG0 bool) error { + name := EventDescriptions[ev.Type].Name + if g.state != gRunning { + return fmt.Errorf("g %d is not running while %s (time %d)", ev.G, name, ev.Ts) + } + if p.g != ev.G { + return fmt.Errorf("p %d is not running g %d while %s (time %d)", ev.P, ev.G, name, ev.Ts) + } + if !allowG0 && ev.G == 0 { + return fmt.Errorf("g 0 did %s (time %d)", name, ev.Ts) + } + return nil + } + + for evIdx := 0; evIdx < events.Len(); evIdx++ { + ev := events.Ptr(evIdx) + + switch ev.Type { + case EvProcStart: + p := ps[ev.P] + if p.running { + return fmt.Errorf("p %d is running before start (time %d)", ev.P, ev.Ts) + } + p.running = true + + ps[ev.P] = p + case EvProcStop: + p := ps[ev.P] + if !p.running { + return fmt.Errorf("p %d is not running before stop (time %d)", ev.P, ev.Ts) + } + if p.g != 0 { + return fmt.Errorf("p %d is running a goroutine %d during stop (time %d)", ev.P, p.g, ev.Ts) + } + p.running = false + + ps[ev.P] = p + case EvGCStart: + if evGC != nil { + return fmt.Errorf("previous GC is not ended before a new one (time %d)", ev.Ts) + } + evGC = ev + // Attribute this to the global GC state. + ev.P = GCP + case EvGCDone: + if evGC == nil { + return fmt.Errorf("bogus GC end (time %d)", ev.Ts) + } + evGC = nil + case EvSTWStart: + evp := &evSTW + if *evp != nil { + return fmt.Errorf("previous STW is not ended before a new one (time %d)", ev.Ts) + } + *evp = ev + case EvSTWDone: + evp := &evSTW + if *evp == nil { + return fmt.Errorf("bogus STW end (time %d)", ev.Ts) + } + *evp = nil + case EvGCSweepStart: + p := ps[ev.P] + if p.evSweep != nil { + return fmt.Errorf("previous sweeping is not ended before a new one (time %d)", ev.Ts) + } + p.evSweep = ev + + ps[ev.P] = p + case EvGCMarkAssistStart: + g := gs[ev.G] + if g.evMarkAssist != nil { + return fmt.Errorf("previous mark assist is not ended before a new one (time %d)", ev.Ts) + } + g.evMarkAssist = ev + + gs[ev.G] = g + case EvGCMarkAssistDone: + // Unlike most events, mark assists can be in progress when a + // goroutine starts tracing, so we can't report an error here. + g := gs[ev.G] + if g.evMarkAssist != nil { + g.evMarkAssist = nil + } + + gs[ev.G] = g + case EvGCSweepDone: + p := ps[ev.P] + if p.evSweep == nil { + return fmt.Errorf("bogus sweeping end (time %d)", ev.Ts) + } + p.evSweep = nil + + ps[ev.P] = p + case EvGoWaiting: + g := gs[ev.G] + if g.state != gRunnable { + return fmt.Errorf("g %d is not runnable before EvGoWaiting (time %d)", ev.G, ev.Ts) + } + g.state = gWaiting + g.ev = ev + + gs[ev.G] = g + case EvGoInSyscall: + g := gs[ev.G] + if g.state != gRunnable { + return fmt.Errorf("g %d is not runnable before EvGoInSyscall (time %d)", ev.G, ev.Ts) + } + g.state = gWaiting + g.ev = ev + + gs[ev.G] = g + case EvGoCreate: + g := gs[ev.G] + p := ps[ev.P] + if err := checkRunning(p, g, ev, true); err != nil { + return err + } + if _, ok := gs[ev.Args[0]]; ok { + return fmt.Errorf("g %d already exists (time %d)", ev.Args[0], ev.Ts) + } + gs[ev.Args[0]] = gdesc{state: gRunnable, ev: ev, evCreate: ev} + + case EvGoStart, EvGoStartLabel: + g := gs[ev.G] + p := ps[ev.P] + if g.state != gRunnable { + return fmt.Errorf("g %d is not runnable before start (time %d)", ev.G, ev.Ts) + } + if p.g != 0 { + return fmt.Errorf("p %d is already running g %d while start g %d (time %d)", ev.P, p.g, ev.G, ev.Ts) + } + g.state = gRunning + g.evStart = ev + p.g = ev.G + if g.evCreate != nil { + ev.StkID = uint32(g.evCreate.Args[1]) + g.evCreate = nil + } + + if g.ev != nil { + g.ev = nil + } + + gs[ev.G] = g + ps[ev.P] = p + case EvGoEnd, EvGoStop: + g := gs[ev.G] + p := ps[ev.P] + if err := checkRunning(p, g, ev, false); err != nil { + return err + } + g.evStart = nil + g.state = gDead + p.g = 0 + + if ev.Type == EvGoEnd { // flush all active regions + delete(activeRegions, ev.G) + } + + gs[ev.G] = g + ps[ev.P] = p + case EvGoSched, EvGoPreempt: + g := gs[ev.G] + p := ps[ev.P] + if err := checkRunning(p, g, ev, false); err != nil { + return err + } + g.state = gRunnable + g.evStart = nil + p.g = 0 + g.ev = ev + + gs[ev.G] = g + ps[ev.P] = p + case EvGoUnblock: + g := gs[ev.G] + p := ps[ev.P] + if g.state != gRunning { + return fmt.Errorf("g %d is not running while unpark (time %d)", ev.G, ev.Ts) + } + if ev.P != TimerP && p.g != ev.G { + return fmt.Errorf("p %d is not running g %d while unpark (time %d)", ev.P, ev.G, ev.Ts) + } + g1 := gs[ev.Args[0]] + if g1.state != gWaiting { + return fmt.Errorf("g %d is not waiting before unpark (time %d)", ev.Args[0], ev.Ts) + } + if g1.ev != nil && g1.ev.Type == EvGoBlockNet { + ev.P = NetpollP + } + g1.state = gRunnable + g1.ev = ev + gs[ev.Args[0]] = g1 + + case EvGoSysCall: + g := gs[ev.G] + p := ps[ev.P] + if err := checkRunning(p, g, ev, false); err != nil { + return err + } + g.ev = ev + + gs[ev.G] = g + case EvGoSysBlock: + g := gs[ev.G] + p := ps[ev.P] + if err := checkRunning(p, g, ev, false); err != nil { + return err + } + g.state = gWaiting + g.evStart = nil + p.g = 0 + + gs[ev.G] = g + ps[ev.P] = p + case EvGoSysExit: + g := gs[ev.G] + if g.state != gWaiting { + return fmt.Errorf("g %d is not waiting during syscall exit (time %d)", ev.G, ev.Ts) + } + g.state = gRunnable + g.ev = ev + + gs[ev.G] = g + case EvGoSleep, EvGoBlock, EvGoBlockSend, EvGoBlockRecv, + EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet, EvGoBlockGC: + g := gs[ev.G] + p := ps[ev.P] + if err := checkRunning(p, g, ev, false); err != nil { + return err + } + g.state = gWaiting + g.ev = ev + g.evStart = nil + p.g = 0 + + gs[ev.G] = g + ps[ev.P] = p + case EvUserTaskCreate: + taskid := ev.Args[0] + if prevEv, ok := tasks[taskid]; ok { + return fmt.Errorf("task id conflicts (id:%d), %q vs %q", taskid, ev, prevEv) + } + tasks[ev.Args[0]] = ev + + case EvUserTaskEnd: + taskid := ev.Args[0] + delete(tasks, taskid) + + case EvUserRegion: + mode := ev.Args[1] + regions := activeRegions[ev.G] + if mode == 0 { // region start + activeRegions[ev.G] = append(regions, ev) // push + } else if mode == 1 { // region end + n := len(regions) + if n > 0 { // matching region start event is in the trace. + s := regions[n-1] + if s.Args[0] != ev.Args[0] || s.Args[2] != ev.Args[2] { // task id, region name mismatch + return fmt.Errorf("misuse of region in goroutine %d: span end %q when the inner-most active span start event is %q", ev.G, ev, s) + } + + if n > 1 { + activeRegions[ev.G] = regions[:n-1] + } else { + delete(activeRegions, ev.G) + } + } + } else { + return fmt.Errorf("invalid user region mode: %q", ev) + } + } + + if ev.StkID != 0 && len(p.stacks[ev.StkID]) == 0 { + // Make sure events don't refer to stacks that don't exist or to + // stacks with zero frames. Neither of these should be possible, but + // better be safe than sorry. + + ev.StkID = 0 + } + + } + + // TODO(mknyszek): restore stacks for EvGoStart events. + return nil +} + +var errMalformedVarint = errors.New("malformatted base-128 varint") + +// readVal reads unsigned base-128 value from r. +func (p *parser) readVal() (uint64, error) { + v, n := binary.Uvarint(p.data[p.off:]) + if n <= 0 { + return 0, errMalformedVarint + } + p.off += n + return v, nil +} + +func readValFrom(buf []byte) (v uint64, rem []byte, err error) { + v, n := binary.Uvarint(buf) + if n <= 0 { + return 0, nil, errMalformedVarint + } + return v, buf[n:], nil +} + +func (ev *Event) String() string { + desc := &EventDescriptions[ev.Type] + w := new(bytes.Buffer) + fmt.Fprintf(w, "%d %s p=%d g=%d stk=%d", ev.Ts, desc.Name, ev.P, ev.G, ev.StkID) + for i, a := range desc.Args { + fmt.Fprintf(w, " %s=%d", a, ev.Args[i]) + } + return w.String() +} + +// argNum returns total number of args for the event accounting for timestamps, +// sequence numbers and differences between trace format versions. +func (raw *rawEvent) argNum() int { + desc := &EventDescriptions[raw.typ] + if raw.typ == EvStack { + return len(raw.args) + } + narg := len(desc.Args) + if desc.Stack { + narg++ + } + switch raw.typ { + case EvBatch, EvFrequency, EvTimerGoroutine: + return narg + } + narg++ // timestamp + return narg +} + +// Event types in the trace. +// Verbatim copy from src/runtime/trace.go with the "trace" prefix removed. +const ( + EvNone event.Type = 0 // unused + EvBatch event.Type = 1 // start of per-P batch of events [pid, timestamp] + EvFrequency event.Type = 2 // contains tracer timer frequency [frequency (ticks per second)] + EvStack event.Type = 3 // stack [stack id, number of PCs, array of {PC, func string ID, file string ID, line}] + EvGomaxprocs event.Type = 4 // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id] + EvProcStart event.Type = 5 // start of P [timestamp, thread id] + EvProcStop event.Type = 6 // stop of P [timestamp] + EvGCStart event.Type = 7 // GC start [timestamp, seq, stack id] + EvGCDone event.Type = 8 // GC done [timestamp] + EvSTWStart event.Type = 9 // GC mark termination start [timestamp, kind] + EvSTWDone event.Type = 10 // GC mark termination done [timestamp] + EvGCSweepStart event.Type = 11 // GC sweep start [timestamp, stack id] + EvGCSweepDone event.Type = 12 // GC sweep done [timestamp, swept, reclaimed] + EvGoCreate event.Type = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id] + EvGoStart event.Type = 14 // goroutine starts running [timestamp, goroutine id, seq] + EvGoEnd event.Type = 15 // goroutine ends [timestamp] + EvGoStop event.Type = 16 // goroutine stops (like in select{}) [timestamp, stack] + EvGoSched event.Type = 17 // goroutine calls Gosched [timestamp, stack] + EvGoPreempt event.Type = 18 // goroutine is preempted [timestamp, stack] + EvGoSleep event.Type = 19 // goroutine calls Sleep [timestamp, stack] + EvGoBlock event.Type = 20 // goroutine blocks [timestamp, stack] + EvGoUnblock event.Type = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack] + EvGoBlockSend event.Type = 22 // goroutine blocks on chan send [timestamp, stack] + EvGoBlockRecv event.Type = 23 // goroutine blocks on chan recv [timestamp, stack] + EvGoBlockSelect event.Type = 24 // goroutine blocks on select [timestamp, stack] + EvGoBlockSync event.Type = 25 // goroutine blocks on Mutex/RWMutex [timestamp, stack] + EvGoBlockCond event.Type = 26 // goroutine blocks on Cond [timestamp, stack] + EvGoBlockNet event.Type = 27 // goroutine blocks on network [timestamp, stack] + EvGoSysCall event.Type = 28 // syscall enter [timestamp, stack] + EvGoSysExit event.Type = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp] + EvGoSysBlock event.Type = 30 // syscall blocks [timestamp] + EvGoWaiting event.Type = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id] + EvGoInSyscall event.Type = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id] + EvHeapAlloc event.Type = 33 // gcController.heapLive change [timestamp, heap live bytes] + EvHeapGoal event.Type = 34 // gcController.heapGoal change [timestamp, heap goal bytes] + EvTimerGoroutine event.Type = 35 // denotes timer goroutine [timer goroutine id] + EvFutileWakeup event.Type = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp] + EvString event.Type = 37 // string dictionary entry [ID, length, string] + EvGoStartLocal event.Type = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id] + EvGoUnblockLocal event.Type = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack] + EvGoSysExitLocal event.Type = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp] + EvGoStartLabel event.Type = 41 // goroutine starts running with label [timestamp, goroutine id, seq, label string id] + EvGoBlockGC event.Type = 42 // goroutine blocks on GC assist [timestamp, stack] + EvGCMarkAssistStart event.Type = 43 // GC mark assist start [timestamp, stack] + EvGCMarkAssistDone event.Type = 44 // GC mark assist done [timestamp] + EvUserTaskCreate event.Type = 45 // trace.NewTask [timestamp, internal task id, internal parent id, stack, name string] + EvUserTaskEnd event.Type = 46 // end of task [timestamp, internal task id, stack] + EvUserRegion event.Type = 47 // trace.WithRegion [timestamp, internal task id, mode(0:start, 1:end), name string] + EvUserLog event.Type = 48 // trace.Log [timestamp, internal id, key string id, stack, value string] + EvCPUSample event.Type = 49 // CPU profiling sample [timestamp, stack, real timestamp, real P id (-1 when absent), goroutine id] + EvCount event.Type = 50 +) + +var EventDescriptions = [256]struct { + Name string + minVersion version.Version + Stack bool + Args []string + SArgs []string // string arguments +}{ + EvNone: {"None", 5, false, []string{}, nil}, + EvBatch: {"Batch", 5, false, []string{"p", "ticks"}, nil}, // in 1.5 format it was {"p", "seq", "ticks"} + EvFrequency: {"Frequency", 5, false, []string{"freq"}, nil}, // in 1.5 format it was {"freq", "unused"} + EvStack: {"Stack", 5, false, []string{"id", "siz"}, nil}, + EvGomaxprocs: {"Gomaxprocs", 5, true, []string{"procs"}, nil}, + EvProcStart: {"ProcStart", 5, false, []string{"thread"}, nil}, + EvProcStop: {"ProcStop", 5, false, []string{}, nil}, + EvGCStart: {"GCStart", 5, true, []string{"seq"}, nil}, // in 1.5 format it was {} + EvGCDone: {"GCDone", 5, false, []string{}, nil}, + EvSTWStart: {"GCSTWStart", 5, false, []string{"kindid"}, []string{"kind"}}, // <= 1.9, args was {} (implicitly {0}) + EvSTWDone: {"GCSTWDone", 5, false, []string{}, nil}, + EvGCSweepStart: {"GCSweepStart", 5, true, []string{}, nil}, + EvGCSweepDone: {"GCSweepDone", 5, false, []string{"swept", "reclaimed"}, nil}, // before 1.9, format was {} + EvGoCreate: {"GoCreate", 5, true, []string{"g", "stack"}, nil}, + EvGoStart: {"GoStart", 5, false, []string{"g", "seq"}, nil}, // in 1.5 format it was {"g"} + EvGoEnd: {"GoEnd", 5, false, []string{}, nil}, + EvGoStop: {"GoStop", 5, true, []string{}, nil}, + EvGoSched: {"GoSched", 5, true, []string{}, nil}, + EvGoPreempt: {"GoPreempt", 5, true, []string{}, nil}, + EvGoSleep: {"GoSleep", 5, true, []string{}, nil}, + EvGoBlock: {"GoBlock", 5, true, []string{}, nil}, + EvGoUnblock: {"GoUnblock", 5, true, []string{"g", "seq"}, nil}, // in 1.5 format it was {"g"} + EvGoBlockSend: {"GoBlockSend", 5, true, []string{}, nil}, + EvGoBlockRecv: {"GoBlockRecv", 5, true, []string{}, nil}, + EvGoBlockSelect: {"GoBlockSelect", 5, true, []string{}, nil}, + EvGoBlockSync: {"GoBlockSync", 5, true, []string{}, nil}, + EvGoBlockCond: {"GoBlockCond", 5, true, []string{}, nil}, + EvGoBlockNet: {"GoBlockNet", 5, true, []string{}, nil}, + EvGoSysCall: {"GoSysCall", 5, true, []string{}, nil}, + EvGoSysExit: {"GoSysExit", 5, false, []string{"g", "seq", "ts"}, nil}, + EvGoSysBlock: {"GoSysBlock", 5, false, []string{}, nil}, + EvGoWaiting: {"GoWaiting", 5, false, []string{"g"}, nil}, + EvGoInSyscall: {"GoInSyscall", 5, false, []string{"g"}, nil}, + EvHeapAlloc: {"HeapAlloc", 5, false, []string{"mem"}, nil}, + EvHeapGoal: {"HeapGoal", 5, false, []string{"mem"}, nil}, + EvTimerGoroutine: {"TimerGoroutine", 5, false, []string{"g"}, nil}, // in 1.5 format it was {"g", "unused"} + EvFutileWakeup: {"FutileWakeup", 5, false, []string{}, nil}, + EvString: {"String", 7, false, []string{}, nil}, + EvGoStartLocal: {"GoStartLocal", 7, false, []string{"g"}, nil}, + EvGoUnblockLocal: {"GoUnblockLocal", 7, true, []string{"g"}, nil}, + EvGoSysExitLocal: {"GoSysExitLocal", 7, false, []string{"g", "ts"}, nil}, + EvGoStartLabel: {"GoStartLabel", 8, false, []string{"g", "seq", "labelid"}, []string{"label"}}, + EvGoBlockGC: {"GoBlockGC", 8, true, []string{}, nil}, + EvGCMarkAssistStart: {"GCMarkAssistStart", 9, true, []string{}, nil}, + EvGCMarkAssistDone: {"GCMarkAssistDone", 9, false, []string{}, nil}, + EvUserTaskCreate: {"UserTaskCreate", 11, true, []string{"taskid", "pid", "typeid"}, []string{"name"}}, + EvUserTaskEnd: {"UserTaskEnd", 11, true, []string{"taskid"}, nil}, + EvUserRegion: {"UserRegion", 11, true, []string{"taskid", "mode", "typeid"}, []string{"name"}}, + EvUserLog: {"UserLog", 11, true, []string{"id", "keyid"}, []string{"category", "message"}}, + EvCPUSample: {"CPUSample", 19, true, []string{"ts", "p", "g"}, nil}, +} + +//gcassert:inline +func (p *parser) allocateStack(size uint64) []uint64 { + if size == 0 { + return nil + } + + // Stacks are plentiful but small. For our "Staticcheck on std" trace with + // 11e6 events, we have roughly 500,000 stacks, using 200 MiB of memory. To + // avoid making 500,000 small allocations we allocate backing arrays 1 MiB + // at a time. + out := p.stacksData + if uint64(len(out)) < size { + out = make([]uint64, 1024*128) + } + p.stacksData = out[size:] + return out[:size:size] +} + +func (tr *Trace) STWReason(kindID uint64) STWReason { + if tr.Version < 21 { + if kindID == 0 || kindID == 1 { + return STWReason(kindID + 1) + } else { + return STWUnknown + } + } else if tr.Version == 21 { + if kindID < NumSTWReasons { + return STWReason(kindID) + } else { + return STWUnknown + } + } else { + return STWUnknown + } +} + +type STWReason int + +const ( + STWUnknown STWReason = 0 + STWGCMarkTermination STWReason = 1 + STWGCSweepTermination STWReason = 2 + STWWriteHeapDump STWReason = 3 + STWGoroutineProfile STWReason = 4 + STWGoroutineProfileCleanup STWReason = 5 + STWAllGoroutinesStackTrace STWReason = 6 + STWReadMemStats STWReason = 7 + STWAllThreadsSyscall STWReason = 8 + STWGOMAXPROCS STWReason = 9 + STWStartTrace STWReason = 10 + STWStopTrace STWReason = 11 + STWCountPagesInUse STWReason = 12 + STWReadMetricsSlow STWReason = 13 + STWReadMemStatsSlow STWReason = 14 + STWPageCachePagesLeaked STWReason = 15 + STWResetDebugLog STWReason = 16 + + NumSTWReasons = 17 +) diff --git a/src/internal/trace/v2/internal/oldtrace/parser_test.go b/src/internal/trace/v2/internal/oldtrace/parser_test.go new file mode 100644 index 0000000000..bc26c36a44 --- /dev/null +++ b/src/internal/trace/v2/internal/oldtrace/parser_test.go @@ -0,0 +1,151 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package oldtrace + +import ( + "bytes" + "internal/trace/v2/version" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestCorruptedInputs(t *testing.T) { + // These inputs crashed parser previously. + tests := []string{ + "gotrace\x00\x020", + "gotrace\x00Q00\x020", + "gotrace\x00T00\x020", + "gotrace\x00\xc3\x0200", + "go 1.5 trace\x00\x00\x00\x00\x020", + "go 1.5 trace\x00\x00\x00\x00Q00\x020", + "go 1.5 trace\x00\x00\x00\x00T00\x020", + "go 1.5 trace\x00\x00\x00\x00\xc3\x0200", + } + for _, data := range tests { + res, err := Parse(strings.NewReader(data), 5) + if err == nil || res.Events.Len() != 0 || res.Stacks != nil { + t.Fatalf("no error on input: %q", data) + } + } +} + +func TestParseCanned(t *testing.T) { + files, err := os.ReadDir("./testdata") + if err != nil { + t.Fatalf("failed to read ./testdata: %v", err) + } + for _, f := range files { + info, err := f.Info() + if err != nil { + t.Fatal(err) + } + if testing.Short() && info.Size() > 10000 { + continue + } + name := filepath.Join("./testdata", f.Name()) + data, err := os.ReadFile(name) + if err != nil { + t.Fatal(err) + } + r := bytes.NewReader(data) + v, err := version.ReadHeader(r) + if err != nil { + t.Errorf("failed to parse good trace %s: %s", f.Name(), err) + } + trace, err := Parse(r, v) + switch { + case strings.HasSuffix(f.Name(), "_good"): + if err != nil { + t.Errorf("failed to parse good trace %v: %v", f.Name(), err) + } + checkTrace(t, int(v), trace) + case strings.HasSuffix(f.Name(), "_unordered"): + if err != ErrTimeOrder { + t.Errorf("unordered trace is not detected %v: %v", f.Name(), err) + } + default: + t.Errorf("unknown input file suffix: %v", f.Name()) + } + } +} + +// checkTrace walks over a good trace and makes a bunch of additional checks +// that may not cause the parser to outright fail. +func checkTrace(t *testing.T, ver int, res Trace) { + for i := 0; i < res.Events.Len(); i++ { + ev := res.Events.Ptr(i) + if ver >= 21 { + if ev.Type == EvSTWStart && res.Strings[ev.Args[0]] == "unknown" { + t.Errorf("found unknown STW event; update stwReasonStrings?") + } + } + } +} + +func TestBuckets(t *testing.T) { + var evs Events + + const N = eventsBucketSize*3 + 123 + for i := 0; i < N; i++ { + evs.append(Event{Ts: Timestamp(i)}) + } + + if n := len(evs.buckets); n != 4 { + t.Fatalf("got %d buckets, want %d", n, 4) + } + + if n := evs.Len(); n != N { + t.Fatalf("got %d events, want %d", n, N) + } + + var n int + evs.All()(func(ev *Event) bool { + n++ + return true + }) + if n != N { + t.Fatalf("iterated over %d events, expected %d", n, N) + } + + const consume = eventsBucketSize + 50 + for i := 0; i < consume; i++ { + if _, ok := evs.Pop(); !ok { + t.Fatalf("iteration failed after %d events", i) + } + } + + if evs.buckets[0] != nil { + t.Fatalf("expected first bucket to have been dropped") + } + for i, b := range evs.buckets[1:] { + if b == nil { + t.Fatalf("expected bucket %d to be non-nil", i+1) + } + } + + if n := evs.Len(); n != N-consume { + t.Fatalf("got %d remaining elements, expected %d", n, N-consume) + } + + ev := evs.Ptr(0) + if ev.Ts != consume { + t.Fatalf("got event %d, expected %d", int(ev.Ts), consume) + } + + for { + _, ok := evs.Pop() + if !ok { + break + } + } + + for i, b := range evs.buckets { + if b != nil { + t.Fatalf("expected bucket %d to be nil", i) + } + } +} diff --git a/src/internal/trace/v2/internal/oldtrace/testdata/fmt_1_21_pprof_good b/src/internal/trace/v2/internal/oldtrace/testdata/fmt_1_21_pprof_good new file mode 100644 index 0000000000..f3ea402fdb Binary files /dev/null and b/src/internal/trace/v2/internal/oldtrace/testdata/fmt_1_21_pprof_good differ diff --git a/src/internal/trace/v2/internal/oldtrace/testdata/http_1_19_good b/src/internal/trace/v2/internal/oldtrace/testdata/http_1_19_good new file mode 100644 index 0000000000..c1d519e7d5 Binary files /dev/null and b/src/internal/trace/v2/internal/oldtrace/testdata/http_1_19_good differ diff --git a/src/internal/trace/v2/internal/oldtrace/testdata/http_1_21_good b/src/internal/trace/v2/internal/oldtrace/testdata/http_1_21_good new file mode 100644 index 0000000000..b3295f9e5d Binary files /dev/null and b/src/internal/trace/v2/internal/oldtrace/testdata/http_1_21_good differ diff --git a/src/internal/trace/v2/internal/oldtrace/testdata/stress_1_11_good b/src/internal/trace/v2/internal/oldtrace/testdata/stress_1_11_good new file mode 100644 index 0000000000..6468d89290 Binary files /dev/null and b/src/internal/trace/v2/internal/oldtrace/testdata/stress_1_11_good differ diff --git a/src/internal/trace/v2/internal/oldtrace/testdata/stress_1_19_good b/src/internal/trace/v2/internal/oldtrace/testdata/stress_1_19_good new file mode 100644 index 0000000000..13f59268e6 Binary files /dev/null and b/src/internal/trace/v2/internal/oldtrace/testdata/stress_1_19_good differ diff --git a/src/internal/trace/v2/internal/oldtrace/testdata/stress_1_21_good b/src/internal/trace/v2/internal/oldtrace/testdata/stress_1_21_good new file mode 100644 index 0000000000..1ade5e0eb6 Binary files /dev/null and b/src/internal/trace/v2/internal/oldtrace/testdata/stress_1_21_good differ diff --git a/src/internal/trace/v2/internal/oldtrace/testdata/stress_start_stop_1_11_good b/src/internal/trace/v2/internal/oldtrace/testdata/stress_start_stop_1_11_good new file mode 100644 index 0000000000..457f01a6cd Binary files /dev/null and b/src/internal/trace/v2/internal/oldtrace/testdata/stress_start_stop_1_11_good differ diff --git a/src/internal/trace/v2/internal/oldtrace/testdata/stress_start_stop_1_19_good b/src/internal/trace/v2/internal/oldtrace/testdata/stress_start_stop_1_19_good new file mode 100644 index 0000000000..92d92789c4 Binary files /dev/null and b/src/internal/trace/v2/internal/oldtrace/testdata/stress_start_stop_1_19_good differ diff --git a/src/internal/trace/v2/internal/oldtrace/testdata/stress_start_stop_1_21_good b/src/internal/trace/v2/internal/oldtrace/testdata/stress_start_stop_1_21_good new file mode 100644 index 0000000000..fff46a9a07 Binary files /dev/null and b/src/internal/trace/v2/internal/oldtrace/testdata/stress_start_stop_1_21_good differ diff --git a/src/internal/trace/v2/internal/oldtrace/testdata/user_task_region_1_11_good b/src/internal/trace/v2/internal/oldtrace/testdata/user_task_region_1_11_good new file mode 100644 index 0000000000..f4edb67e65 Binary files /dev/null and b/src/internal/trace/v2/internal/oldtrace/testdata/user_task_region_1_11_good differ diff --git a/src/internal/trace/v2/internal/oldtrace/testdata/user_task_region_1_19_good b/src/internal/trace/v2/internal/oldtrace/testdata/user_task_region_1_19_good new file mode 100644 index 0000000000..1daa3b25a6 Binary files /dev/null and b/src/internal/trace/v2/internal/oldtrace/testdata/user_task_region_1_19_good differ diff --git a/src/internal/trace/v2/internal/oldtrace/testdata/user_task_region_1_21_good b/src/internal/trace/v2/internal/oldtrace/testdata/user_task_region_1_21_good new file mode 100644 index 0000000000..5c01a6405d Binary files /dev/null and b/src/internal/trace/v2/internal/oldtrace/testdata/user_task_region_1_21_good differ diff --git a/src/internal/trace/v2/oldtrace.go b/src/internal/trace/v2/oldtrace.go new file mode 100644 index 0000000000..b0586fe92a --- /dev/null +++ b/src/internal/trace/v2/oldtrace.go @@ -0,0 +1,567 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements conversion from old (Go 1.11–Go 1.21) traces to the Go +// 1.22 format. +// +// Most events have direct equivalents in 1.22, at worst requiring arguments to +// be reordered. Some events, such as GoWaiting need to look ahead for follow-up +// events to determine the correct translation. GoSyscall, which is an +// instantaneous event, gets turned into a 1 ns long pair of +// GoSyscallStart+GoSyscallEnd, unless we observe a GoSysBlock, in which case we +// emit a GoSyscallStart+GoSyscallEndBlocked pair with the correct duration +// (i.e. starting at the original GoSyscall). +// +// The resulting trace treats the old trace as a single, large generation, +// sharing a single evTable for all events. +// +// We use a new (compared to what was used for 'go tool trace' in earlier +// versions of Go) parser for old traces that is optimized for speed, low memory +// usage, and minimal GC pressure. It allocates events in batches so that even +// though we have to load the entire trace into memory, the conversion process +// shouldn't result in a doubling of memory usage, even if all converted events +// are kept alive, as we free batches once we're done with them. +// +// The conversion process is lossless. + +package trace + +import ( + "errors" + "fmt" + "internal/trace/v2/event" + "internal/trace/v2/event/go122" + "internal/trace/v2/internal/oldtrace" + "io" +) + +type oldTraceConverter struct { + trace oldtrace.Trace + evt *evTable + preInit bool + createdPreInit map[GoID]struct{} + events oldtrace.Events + extra []Event + extraArr [3]Event + tasks map[TaskID]taskState + seenProcs map[ProcID]struct{} + lastTs Time + procMs map[ProcID]ThreadID + lastStwReason uint64 + + inlineToStringID []uint64 + builtinToStringID []uint64 +} + +const ( + // Block reasons + sForever = iota + sPreempted + sGosched + sSleep + sChanSend + sChanRecv + sNetwork + sSync + sSyncCond + sSelect + sEmpty + sMarkAssistWait + + // STW kinds + sSTWUnknown + sSTWGCMarkTermination + sSTWGCSweepTermination + sSTWWriteHeapDump + sSTWGoroutineProfile + sSTWGoroutineProfileCleanup + sSTWAllGoroutinesStackTrace + sSTWReadMemStats + sSTWAllThreadsSyscall + sSTWGOMAXPROCS + sSTWStartTrace + sSTWStopTrace + sSTWCountPagesInUse + sSTWReadMetricsSlow + sSTWReadMemStatsSlow + sSTWPageCachePagesLeaked + sSTWResetDebugLog + + sLast +) + +func (it *oldTraceConverter) init(pr oldtrace.Trace) error { + it.trace = pr + it.preInit = true + it.createdPreInit = make(map[GoID]struct{}) + it.evt = &evTable{pcs: make(map[uint64]frame)} + it.events = pr.Events + it.extra = it.extraArr[:0] + it.tasks = make(map[TaskID]taskState) + it.seenProcs = make(map[ProcID]struct{}) + it.procMs = make(map[ProcID]ThreadID) + it.lastTs = -1 + + evt := it.evt + + // Convert from oldtracer's Strings map to our dataTable. + var max uint64 + for id, s := range pr.Strings { + evt.strings.insert(stringID(id), s) + if id > max { + max = id + } + } + pr.Strings = nil + + // Add all strings used for UserLog. In the old trace format, these were + // stored inline and didn't have IDs. We generate IDs for them. + if max+uint64(len(pr.InlineStrings)) < max { + return errors.New("trace contains too many strings") + } + var addErr error + add := func(id stringID, s string) { + if err := evt.strings.insert(id, s); err != nil && addErr == nil { + addErr = err + } + } + for id, s := range pr.InlineStrings { + nid := max + 1 + uint64(id) + it.inlineToStringID = append(it.inlineToStringID, nid) + add(stringID(nid), s) + } + max += uint64(len(pr.InlineStrings)) + pr.InlineStrings = nil + + // Add strings that the converter emits explicitly. + if max+uint64(sLast) < max { + return errors.New("trace contains too many strings") + } + it.builtinToStringID = make([]uint64, sLast) + addBuiltin := func(c int, s string) { + nid := max + 1 + uint64(c) + it.builtinToStringID[c] = nid + add(stringID(nid), s) + } + addBuiltin(sForever, "forever") + addBuiltin(sPreempted, "preempted") + addBuiltin(sGosched, "runtime.Gosched") + addBuiltin(sSleep, "sleep") + addBuiltin(sChanSend, "chan send") + addBuiltin(sChanRecv, "chan receive") + addBuiltin(sNetwork, "network") + addBuiltin(sSync, "sync") + addBuiltin(sSyncCond, "sync.(*Cond).Wait") + addBuiltin(sSelect, "select") + addBuiltin(sEmpty, "") + addBuiltin(sMarkAssistWait, "GC mark assist wait for work") + addBuiltin(sSTWUnknown, "") + addBuiltin(sSTWGCMarkTermination, "GC mark termination") + addBuiltin(sSTWGCSweepTermination, "GC sweep termination") + addBuiltin(sSTWWriteHeapDump, "write heap dump") + addBuiltin(sSTWGoroutineProfile, "goroutine profile") + addBuiltin(sSTWGoroutineProfileCleanup, "goroutine profile cleanup") + addBuiltin(sSTWAllGoroutinesStackTrace, "all goroutine stack trace") + addBuiltin(sSTWReadMemStats, "read mem stats") + addBuiltin(sSTWAllThreadsSyscall, "AllThreadsSyscall") + addBuiltin(sSTWGOMAXPROCS, "GOMAXPROCS") + addBuiltin(sSTWStartTrace, "start trace") + addBuiltin(sSTWStopTrace, "stop trace") + addBuiltin(sSTWCountPagesInUse, "CountPagesInUse (test)") + addBuiltin(sSTWReadMetricsSlow, "ReadMetricsSlow (test)") + addBuiltin(sSTWReadMemStatsSlow, "ReadMemStatsSlow (test)") + addBuiltin(sSTWPageCachePagesLeaked, "PageCachePagesLeaked (test)") + addBuiltin(sSTWResetDebugLog, "ResetDebugLog (test)") + + if addErr != nil { + // This should be impossible but let's be safe. + return fmt.Errorf("couldn't add strings: %w", addErr) + } + + it.evt.strings.compactify() + + // Convert stacks. + for id, stk := range pr.Stacks { + evt.stacks.insert(stackID(id), stack{pcs: stk}) + } + + // OPT(dh): if we could share the frame type between this package and + // oldtrace we wouldn't have to copy the map. + for pc, f := range pr.PCs { + evt.pcs[pc] = frame{ + pc: pc, + funcID: stringID(f.Fn), + fileID: stringID(f.File), + line: uint64(f.Line), + } + } + pr.Stacks = nil + pr.PCs = nil + evt.stacks.compactify() + return nil +} + +// next returns the next event, io.EOF if there are no more events, or a +// descriptive error for invalid events. +func (it *oldTraceConverter) next() (Event, error) { + if len(it.extra) > 0 { + ev := it.extra[0] + it.extra = it.extra[1:] + + if len(it.extra) == 0 { + it.extra = it.extraArr[:0] + } + // Two events aren't allowed to fall on the same timestamp in the new API, + // but this may happen when we produce EvGoStatus events + if ev.base.time <= it.lastTs { + ev.base.time = it.lastTs + 1 + } + it.lastTs = ev.base.time + return ev, nil + } + + oev, ok := it.events.Pop() + if !ok { + return Event{}, io.EOF + } + + ev, err := it.convertEvent(oev) + + if err == errSkip { + return it.next() + } else if err != nil { + return Event{}, err + } + + // Two events aren't allowed to fall on the same timestamp in the new API, + // but this may happen when we produce EvGoStatus events + if ev.base.time <= it.lastTs { + ev.base.time = it.lastTs + 1 + } + it.lastTs = ev.base.time + return ev, nil +} + +var errSkip = errors.New("skip event") + +// convertEvent converts an event from the old trace format to zero or more +// events in the new format. Most events translate 1 to 1. Some events don't +// result in an event right away, in which case convertEvent returns errSkip. +// Some events result in more than one new event; in this case, convertEvent +// returns the first event and stores additional events in it.extra. When +// encountering events that oldtrace shouldn't be able to emit, ocnvertEvent +// returns a descriptive error. +func (it *oldTraceConverter) convertEvent(ev *oldtrace.Event) (OUT Event, ERR error) { + var mappedType event.Type + mappedArgs := ev.Args + + switch ev.Type { + case oldtrace.EvGomaxprocs: + mappedType = go122.EvProcsChange + if it.preInit { + // The first EvGomaxprocs signals the end of trace initialization. At this point we've seen + // all goroutines that already existed at trace begin. + it.preInit = false + for gid := range it.createdPreInit { + // These are goroutines that already existed when tracing started but for which we + // received neither GoWaiting, GoInSyscall, or GoStart. These are goroutines that are in + // the states _Gidle or _Grunnable. + it.extra = append(it.extra, Event{ + ctx: schedCtx{ + // G: GoID(gid), + G: NoGoroutine, + P: NoProc, + M: NoThread, + }, + table: it.evt, + base: baseEvent{ + typ: go122.EvGoStatus, + time: Time(ev.Ts), + args: [4]uint64{uint64(gid), ^uint64(0), uint64(go122.GoRunnable)}, + }, + }) + } + it.createdPreInit = nil + return Event{}, errSkip + } + case oldtrace.EvProcStart: + it.procMs[ProcID(ev.P)] = ThreadID(ev.Args[0]) + if _, ok := it.seenProcs[ProcID(ev.P)]; ok { + mappedType = go122.EvProcStart + mappedArgs = [4]uint64{uint64(ev.P)} + } else { + it.seenProcs[ProcID(ev.P)] = struct{}{} + mappedType = go122.EvProcStatus + mappedArgs = [4]uint64{uint64(ev.P), uint64(go122.ProcRunning)} + } + case oldtrace.EvProcStop: + if _, ok := it.seenProcs[ProcID(ev.P)]; ok { + mappedType = go122.EvProcStop + mappedArgs = [4]uint64{uint64(ev.P)} + } else { + it.seenProcs[ProcID(ev.P)] = struct{}{} + mappedType = go122.EvProcStatus + mappedArgs = [4]uint64{uint64(ev.P), uint64(go122.ProcIdle)} + } + case oldtrace.EvGCStart: + mappedType = go122.EvGCBegin + case oldtrace.EvGCDone: + mappedType = go122.EvGCEnd + case oldtrace.EvSTWStart: + sid := it.builtinToStringID[sSTWUnknown+it.trace.STWReason(ev.Args[0])] + it.lastStwReason = sid + mappedType = go122.EvSTWBegin + mappedArgs = [4]uint64{uint64(sid)} + case oldtrace.EvSTWDone: + mappedType = go122.EvSTWEnd + mappedArgs = [4]uint64{it.lastStwReason} + case oldtrace.EvGCSweepStart: + mappedType = go122.EvGCSweepBegin + case oldtrace.EvGCSweepDone: + mappedType = go122.EvGCSweepEnd + case oldtrace.EvGoCreate: + if it.preInit { + it.createdPreInit[GoID(ev.Args[0])] = struct{}{} + return Event{}, errSkip + } + mappedType = go122.EvGoCreate + case oldtrace.EvGoStart: + if it.preInit { + mappedType = go122.EvGoStatus + mappedArgs = [4]uint64{ev.Args[0], ^uint64(0), uint64(go122.GoRunning)} + delete(it.createdPreInit, GoID(ev.Args[0])) + } else { + mappedType = go122.EvGoStart + } + case oldtrace.EvGoStartLabel: + it.extra = []Event{{ + ctx: schedCtx{ + G: GoID(ev.G), + P: ProcID(ev.P), + M: it.procMs[ProcID(ev.P)], + }, + table: it.evt, + base: baseEvent{ + typ: go122.EvGoLabel, + time: Time(ev.Ts), + args: [4]uint64{ev.Args[2]}, + }, + }} + return Event{ + ctx: schedCtx{ + G: GoID(ev.G), + P: ProcID(ev.P), + M: it.procMs[ProcID(ev.P)], + }, + table: it.evt, + base: baseEvent{ + typ: go122.EvGoStart, + time: Time(ev.Ts), + args: ev.Args, + }, + }, nil + case oldtrace.EvGoEnd: + mappedType = go122.EvGoDestroy + case oldtrace.EvGoStop: + mappedType = go122.EvGoBlock + mappedArgs = [4]uint64{uint64(it.builtinToStringID[sForever]), uint64(ev.StkID)} + case oldtrace.EvGoSched: + mappedType = go122.EvGoStop + mappedArgs = [4]uint64{uint64(it.builtinToStringID[sGosched]), uint64(ev.StkID)} + case oldtrace.EvGoPreempt: + mappedType = go122.EvGoStop + mappedArgs = [4]uint64{uint64(it.builtinToStringID[sPreempted]), uint64(ev.StkID)} + case oldtrace.EvGoSleep: + mappedType = go122.EvGoBlock + mappedArgs = [4]uint64{uint64(it.builtinToStringID[sSleep]), uint64(ev.StkID)} + case oldtrace.EvGoBlock: + mappedType = go122.EvGoBlock + mappedArgs = [4]uint64{uint64(it.builtinToStringID[sEmpty]), uint64(ev.StkID)} + case oldtrace.EvGoUnblock: + mappedType = go122.EvGoUnblock + case oldtrace.EvGoBlockSend: + mappedType = go122.EvGoBlock + mappedArgs = [4]uint64{uint64(it.builtinToStringID[sChanSend]), uint64(ev.StkID)} + case oldtrace.EvGoBlockRecv: + mappedType = go122.EvGoBlock + mappedArgs = [4]uint64{uint64(it.builtinToStringID[sChanRecv]), uint64(ev.StkID)} + case oldtrace.EvGoBlockSelect: + mappedType = go122.EvGoBlock + mappedArgs = [4]uint64{uint64(it.builtinToStringID[sSelect]), uint64(ev.StkID)} + case oldtrace.EvGoBlockSync: + mappedType = go122.EvGoBlock + mappedArgs = [4]uint64{uint64(it.builtinToStringID[sSync]), uint64(ev.StkID)} + case oldtrace.EvGoBlockCond: + mappedType = go122.EvGoBlock + mappedArgs = [4]uint64{uint64(it.builtinToStringID[sSyncCond]), uint64(ev.StkID)} + case oldtrace.EvGoBlockNet: + mappedType = go122.EvGoBlock + mappedArgs = [4]uint64{uint64(it.builtinToStringID[sNetwork]), uint64(ev.StkID)} + case oldtrace.EvGoBlockGC: + mappedType = go122.EvGoBlock + mappedArgs = [4]uint64{uint64(it.builtinToStringID[sMarkAssistWait]), uint64(ev.StkID)} + case oldtrace.EvGoSysCall: + // Look for the next event for the same G to determine if the syscall + // blocked. + blocked := false + it.events.All()(func(nev *oldtrace.Event) bool { + if nev.G != ev.G { + return true + } + // After an EvGoSysCall, the next event on the same G will either be + // EvGoSysBlock to denote a blocking syscall, or some other event + // (or the end of the trace) if the syscall didn't block. + if nev.Type == oldtrace.EvGoSysBlock { + blocked = true + } + return false + }) + if blocked { + mappedType = go122.EvGoSyscallBegin + mappedArgs = [4]uint64{1: uint64(ev.StkID)} + } else { + // Convert the old instantaneous syscall event to a pair of syscall + // begin and syscall end and give it the shortest possible duration, + // 1ns. + out1 := Event{ + ctx: schedCtx{ + G: GoID(ev.G), + P: ProcID(ev.P), + M: it.procMs[ProcID(ev.P)], + }, + table: it.evt, + base: baseEvent{ + typ: go122.EvGoSyscallBegin, + time: Time(ev.Ts), + args: [4]uint64{1: uint64(ev.StkID)}, + }, + } + + out2 := Event{ + ctx: out1.ctx, + table: it.evt, + base: baseEvent{ + typ: go122.EvGoSyscallEnd, + time: Time(ev.Ts + 1), + args: [4]uint64{}, + }, + } + + it.extra = append(it.extra, out2) + return out1, nil + } + + case oldtrace.EvGoSysExit: + mappedType = go122.EvGoSyscallEndBlocked + case oldtrace.EvGoSysBlock: + return Event{}, errSkip + case oldtrace.EvGoWaiting: + mappedType = go122.EvGoStatus + mappedArgs = [4]uint64{ev.Args[0], ^uint64(0), uint64(go122.GoWaiting)} + delete(it.createdPreInit, GoID(ev.Args[0])) + case oldtrace.EvGoInSyscall: + mappedType = go122.EvGoStatus + // In the new tracer, GoStatus with GoSyscall knows what thread the + // syscall is on. In the old tracer, EvGoInSyscall doesn't contain that + // information and all we can do here is specify NoThread. + mappedArgs = [4]uint64{ev.Args[0], ^uint64(0), uint64(go122.GoSyscall)} + delete(it.createdPreInit, GoID(ev.Args[0])) + case oldtrace.EvHeapAlloc: + mappedType = go122.EvHeapAlloc + case oldtrace.EvHeapGoal: + mappedType = go122.EvHeapGoal + case oldtrace.EvGCMarkAssistStart: + mappedType = go122.EvGCMarkAssistBegin + case oldtrace.EvGCMarkAssistDone: + mappedType = go122.EvGCMarkAssistEnd + case oldtrace.EvUserTaskCreate: + mappedType = go122.EvUserTaskBegin + parent := ev.Args[1] + if parent == 0 { + parent = uint64(NoTask) + } + mappedArgs = [4]uint64{ev.Args[0], parent, ev.Args[2], uint64(ev.StkID)} + name, _ := it.evt.strings.get(stringID(ev.Args[2])) + it.tasks[TaskID(ev.Args[0])] = taskState{name: name, parentID: TaskID(ev.Args[1])} + case oldtrace.EvUserTaskEnd: + mappedType = go122.EvUserTaskEnd + // Event.Task expects the parent and name to be smuggled in extra args + // and as extra strings. + ts, ok := it.tasks[TaskID(ev.Args[0])] + if ok { + delete(it.tasks, TaskID(ev.Args[0])) + mappedArgs = [4]uint64{ + ev.Args[0], + ev.Args[1], + uint64(ts.parentID), + uint64(it.evt.addExtraString(ts.name)), + } + } else { + mappedArgs = [4]uint64{ev.Args[0], ev.Args[1], uint64(NoTask), uint64(it.evt.addExtraString(""))} + } + case oldtrace.EvUserRegion: + switch ev.Args[1] { + case 0: // start + mappedType = go122.EvUserRegionBegin + case 1: // end + mappedType = go122.EvUserRegionEnd + } + mappedArgs = [4]uint64{ev.Args[0], ev.Args[2], uint64(ev.StkID)} + case oldtrace.EvUserLog: + mappedType = go122.EvUserLog + mappedArgs = [4]uint64{ev.Args[0], ev.Args[1], it.inlineToStringID[ev.Args[3]], uint64(ev.StkID)} + case oldtrace.EvCPUSample: + mappedType = go122.EvCPUSample + // When emitted by the Go 1.22 tracer, CPU samples have 5 arguments: + // timestamp, M, P, G, stack. However, after they get turned into Event, + // they have the arguments stack, M, P, G. + // + // In Go 1.21, CPU samples did not have Ms. + mappedArgs = [4]uint64{uint64(ev.StkID), ^uint64(0), uint64(ev.P), ev.G} + default: + return Event{}, fmt.Errorf("unexpected event type %v", ev.Type) + } + + if oldtrace.EventDescriptions[ev.Type].Stack { + if stackIDs := go122.Specs()[mappedType].StackIDs; len(stackIDs) > 0 { + mappedArgs[stackIDs[0]-1] = uint64(ev.StkID) + } + } + + m := NoThread + if ev.P != -1 && ev.Type != oldtrace.EvCPUSample { + if t, ok := it.procMs[ProcID(ev.P)]; ok { + m = ThreadID(t) + } + } + if ev.Type == oldtrace.EvProcStop { + delete(it.procMs, ProcID(ev.P)) + } + g := GoID(ev.G) + if g == 0 { + g = NoGoroutine + } + out := Event{ + ctx: schedCtx{ + G: GoID(g), + P: ProcID(ev.P), + M: m, + }, + table: it.evt, + base: baseEvent{ + typ: mappedType, + time: Time(ev.Ts), + args: mappedArgs, + }, + } + return out, nil +} + +// convertOldFormat takes a fully loaded trace in the old trace format and +// returns an iterator over events in the new format. +func convertOldFormat(pr oldtrace.Trace) *oldTraceConverter { + it := &oldTraceConverter{} + it.init(pr) + return it +} diff --git a/src/internal/trace/v2/oldtrace_test.go b/src/internal/trace/v2/oldtrace_test.go new file mode 100644 index 0000000000..e315b48160 --- /dev/null +++ b/src/internal/trace/v2/oldtrace_test.go @@ -0,0 +1,89 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package trace_test + +import ( + "internal/trace/v2" + "internal/trace/v2/testtrace" + "io" + "os" + "path/filepath" + "testing" +) + +func TestOldtrace(t *testing.T) { + traces, err := filepath.Glob("./internal/oldtrace/testdata/*_good") + if err != nil { + t.Fatalf("failed to glob for tests: %s", err) + } + var testedUserRegions bool + for _, p := range traces { + p := p + testName, err := filepath.Rel("./internal/oldtrace/testdata", p) + if err != nil { + t.Fatalf("failed to relativize testdata path: %s", err) + } + t.Run(testName, func(t *testing.T) { + f, err := os.Open(p) + if err != nil { + t.Fatalf("failed to open test %q: %s", p, err) + } + defer f.Close() + + tr, err := trace.NewReader(f) + if err != nil { + t.Fatalf("failed to create reader: %s", err) + } + + v := testtrace.NewValidator() + v.Go121 = true + for { + ev, err := tr.ReadEvent() + if err != nil { + if err == io.EOF { + break + } + t.Fatalf("couldn't read converted event: %s", err) + } + if err := v.Event(ev); err != nil { + t.Fatalf("converted event did not validate; event: \n%s\nerror: %s", ev, err) + } + + if testName == "user_task_region_1_21_good" { + testedUserRegions = true + validRegions := map[string]struct{}{ + "post-existing region": struct{}{}, + "region0": struct{}{}, + "region1": struct{}{}, + } + // Check that we correctly convert user regions. These + // strings were generated by + // runtime/trace.TestUserTaskRegion, which is the basis for + // the user_task_region_* test cases. We only check for the + // Go 1.21 traces because earlier traces used different + // strings. + switch ev.Kind() { + case trace.EventRegionBegin, trace.EventRegionEnd: + if _, ok := validRegions[ev.Region().Type]; !ok { + t.Fatalf("converted event has unexpected region type:\n%s", ev) + } + case trace.EventTaskBegin, trace.EventTaskEnd: + if ev.Task().Type != "task0" { + t.Fatalf("converted event has unexpected task type name:\n%s", ev) + } + case trace.EventLog: + l := ev.Log() + if l.Task != 1 || l.Category != "key0" || l.Message != "0123456789abcdef" { + t.Fatalf("converted event has unexpected user log:\n%s", ev) + } + } + } + } + }) + } + if !testedUserRegions { + t.Fatal("didn't see expected test case user_task_region_1_21_good") + } +} diff --git a/src/internal/trace/v2/reader.go b/src/internal/trace/v2/reader.go index d24bf7920e..94a522ce66 100644 --- a/src/internal/trace/v2/reader.go +++ b/src/internal/trace/v2/reader.go @@ -12,6 +12,7 @@ import ( "strings" "internal/trace/v2/event/go122" + "internal/trace/v2/internal/oldtrace" "internal/trace/v2/version" ) @@ -25,6 +26,8 @@ type Reader struct { cpuSamples []cpuSample order ordering emittedSync bool + + go121Events *oldTraceConverter } // NewReader creates a new trace reader. @@ -34,20 +37,30 @@ func NewReader(r io.Reader) (*Reader, error) { if err != nil { return nil, err } - if v != version.Go122 { + switch v { + case version.Go111, version.Go119, version.Go121: + tr, err := oldtrace.Parse(br, v) + if err != nil { + return nil, err + } + return &Reader{ + go121Events: convertOldFormat(tr), + }, nil + case version.Go122: + return &Reader{ + r: br, + order: ordering{ + mStates: make(map[ThreadID]*mState), + pStates: make(map[ProcID]*pState), + gStates: make(map[GoID]*gState), + activeTasks: make(map[TaskID]taskState), + }, + // Don't emit a sync event when we first go to emit events. + emittedSync: true, + }, nil + default: return nil, fmt.Errorf("unknown or unsupported version go 1.%d", v) } - return &Reader{ - r: br, - order: ordering{ - mStates: make(map[ThreadID]*mState), - pStates: make(map[ProcID]*pState), - gStates: make(map[GoID]*gState), - activeTasks: make(map[TaskID]taskState), - }, - // Don't emit a sync event when we first go to emit events. - emittedSync: true, - }, nil } // ReadEvent reads a single event from the stream. @@ -55,6 +68,15 @@ func NewReader(r io.Reader) (*Reader, error) { // If the stream has been exhausted, it returns an invalid // event and io.EOF. func (r *Reader) ReadEvent() (e Event, err error) { + if r.go121Events != nil { + ev, err := r.go121Events.next() + if err != nil { + // XXX do we have to emit an EventSync when the trace is done? + return Event{}, err + } + return ev, nil + } + // Go 1.22+ trace parsing algorithm. // // (1) Read in all the batches for the next generation from the stream. diff --git a/src/internal/trace/v2/testdata/fuzz/FuzzReader/4055b17cae1a3443 b/src/internal/trace/v2/testdata/fuzz/FuzzReader/4055b17cae1a3443 new file mode 100644 index 0000000000..ea5417c78a --- /dev/null +++ b/src/internal/trace/v2/testdata/fuzz/FuzzReader/4055b17cae1a3443 @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("go 1.11 trace\x00\x00\x00A00\x020$0") diff --git a/src/internal/trace/v2/testtrace/validation.go b/src/internal/trace/v2/testtrace/validation.go index 021c7785fd..1dba366f5b 100644 --- a/src/internal/trace/v2/testtrace/validation.go +++ b/src/internal/trace/v2/testtrace/validation.go @@ -21,6 +21,7 @@ type Validator struct { ranges map[trace.ResourceID][]string tasks map[trace.TaskID]string seenSync bool + Go121 bool } type schedContext struct { @@ -160,7 +161,7 @@ func (v *Validator) Event(ev trace.Event) error { } // Validate sched context. if new.Executing() { - ctx := v.getOrCreateThread(e, ev.Thread()) + ctx := v.getOrCreateThread(e, ev, ev.Thread()) if ctx != nil { if ctx.G != trace.NoGoroutine && ctx.G != id { e.Errorf("tried to run goroutine %d when one was already executing (%d) on thread %d", id, ctx.G, ev.Thread()) @@ -213,7 +214,7 @@ func (v *Validator) Event(ev trace.Event) error { } // Validate sched context. if new.Executing() { - ctx := v.getOrCreateThread(e, ev.Thread()) + ctx := v.getOrCreateThread(e, ev, ev.Thread()) if ctx != nil { if ctx.P != trace.NoProc && ctx.P != id { e.Errorf("tried to run proc %d when one was already executing (%d) on thread %d", id, ctx.P, ev.Thread()) @@ -316,8 +317,25 @@ func (v *Validator) deleteRange(r trace.ResourceID, name string) { v.ranges[r] = slices.Delete(ranges, i, i+1) } -func (v *Validator) getOrCreateThread(e *errAccumulator, m trace.ThreadID) *schedContext { - if m == trace.NoThread { +func (v *Validator) getOrCreateThread(e *errAccumulator, ev trace.Event, m trace.ThreadID) *schedContext { + lenient := func() bool { + // Be lenient about GoUndetermined -> GoSyscall transitions if they + // originate from an old trace. These transitions lack thread + // information in trace formats older than 1.22. + if !v.Go121 { + return false + } + if ev.Kind() != trace.EventStateTransition { + return false + } + tr := ev.StateTransition() + if tr.Resource.Kind != trace.ResourceGoroutine { + return false + } + from, to := tr.Goroutine() + return from == trace.GoUndetermined && to == trace.GoSyscall + } + if m == trace.NoThread && !lenient() { e.Errorf("must have thread, but thread ID is none") return nil } diff --git a/src/internal/trace/v2/version/version.go b/src/internal/trace/v2/version/version.go index 28189f80db..f42dbb9eef 100644 --- a/src/internal/trace/v2/version/version.go +++ b/src/internal/trace/v2/version/version.go @@ -16,11 +16,20 @@ import ( type Version uint32 const ( + Go111 Version = 11 + Go119 Version = 19 + Go121 Version = 21 Go122 Version = 22 Current = Go122 ) var versions = map[Version][]event.Spec{ + // Go 1.11–1.21 use a different parser and are only set here for the sake of + // Version.Valid. + Go111: nil, + Go119: nil, + Go121: nil, + Go122: go122.Specs(), }