type Event struct {
Off int // offset in input file (for debugging and error reporting)
Type byte // one of Ev*
+ Seq int64 // sequence number
Ts int64 // timestamp in nanoseconds
P int // P on which the event happened (can be one of TimerP, NetpollP, SyscallP)
G uint64 // G on which the event happened
StkID uint64 // unique stack ID
Stk []*Frame // stack trace (can be empty)
- Args [2]uint64 // event-type-specific arguments
+ Args [3]uint64 // event-type-specific arguments
// linked event (can be nil), depends on event type:
// for GCStart: the GCStop
// for GCScanStart: the GCScanDone
// It does not care about specific event types and argument meaning.
func readTrace(r io.Reader) ([]rawEvent, error) {
// Read and validate trace header.
- var buf [8]byte
+ var buf [16]byte
off, err := r.Read(buf[:])
- if off != 8 || err != nil {
+ if off != 16 || err != nil {
return nil, fmt.Errorf("failed to read header: read %v, err %v", off, err)
}
- if bytes.Compare(buf[:], []byte("gotrace\x00")) != 0 {
+ if bytes.Compare(buf[:], []byte("go 1.5 trace\x00\x00\x00\x00")) != 0 {
return nil, fmt.Errorf("not a trace file")
}
}
off += n
typ := buf[0] << 2 >> 2
- narg := buf[0]>>6 + 1
+ narg := buf[0] >> 6
ev := rawEvent{typ: typ, off: off0}
- if narg <= 3 {
- for i := 0; i < int(narg); i++ {
+ if narg < 3 {
+ for i := 0; i < int(narg)+2; i++ { // sequence number and time stamp are present but not counted in narg
var v uint64
v, off, err = readVal(r, off)
if err != nil {
ev.args = append(ev.args, v)
}
} else {
- // If narg == 4, the first value is length of the event in bytes.
+ // If narg == 3, the first value is length of the event in bytes.
var v uint64
v, off, err = readVal(r, off)
if err != nil {
// Parse events transforms raw events into events.
// It does analyze and verify per-event-type arguments.
func parseEvents(rawEvents []rawEvent) (events []*Event, err error) {
- var ticksPerSec, lastTs int64
+ var ticksPerSec, lastSeq, lastTs int64
var lastG, timerGoid uint64
var lastP int
lastGs := make(map[int]uint64) // last goroutine running on P
narg++
}
if raw.typ != EvBatch && raw.typ != EvFrequency && raw.typ != EvTimerGoroutine {
+ narg++ // sequence number
narg++ // timestamp
}
if len(raw.args) != narg {
lastGs[lastP] = lastG
lastP = int(raw.args[0])
lastG = lastGs[lastP]
- lastTs = int64(raw.args[1])
+ lastSeq = int64(raw.args[1])
+ lastTs = int64(raw.args[2])
case EvFrequency:
ticksPerSec = int64(raw.args[0])
if ticksPerSec <= 0 {
}
default:
e := &Event{Off: raw.off, Type: raw.typ, P: lastP, G: lastG}
- e.Ts = lastTs + int64(raw.args[0])
+ e.Seq = lastSeq + int64(raw.args[0])
+ e.Ts = lastTs + int64(raw.args[1])
+ lastSeq = e.Seq
lastTs = e.Ts
for i := range desc.Args {
- e.Args[i] = raw.args[i+1]
+ e.Args[i] = raw.args[i+2]
}
if desc.Stack {
- e.StkID = raw.args[len(desc.Args)+1]
+ e.StkID = raw.args[len(desc.Args)+2]
}
switch raw.typ {
case EvGoStart:
EvGoSysBlock:
lastG = 0
case EvGoSysExit:
- if e.Args[1] != 0 {
- // EvGoSysExit emission is delayed until the thread has a P.
- // Give it the real timestamp.
- e.Ts = int64(e.Args[1])
+ // EvGoSysExit emission is delayed until the thread has a P.
+ // Give it the real sequence number and time stamp.
+ e.Seq = int64(e.Args[1])
+ if e.Args[2] != 0 {
+ e.Ts = int64(e.Args[2])
}
}
events = append(events, e)
}
}
- // Sort by time and translate cpu ticks to real time.
- // Use stable sort because adjacent events in a batch can have the same timestamp
- // (this was observed on some VMs).
- sort.Stable(eventList(events))
+ // Sort by sequence number and translate cpu ticks to real time.
+ sort.Sort(eventList(events))
if ticksPerSec == 0 {
err = fmt.Errorf("no EvFrequency event")
return
return newEvents, nil
}
+// ErrTimeOrder is returned by Parse when the trace contains
+// time stamps that do not respect actual event ordering.
+var ErrTimeOrder = fmt.Errorf("time stamps out of order")
+
// postProcessTrace does inter-event verification and information restoration.
// The resulting trace is guaranteed to be consistent
// (for example, a P does not run two Gs at the same time, or a G is indeed
// TODO(dvyukov): restore stacks for EvGoStart events.
// TODO(dvyukov): test that all EvGoStart events has non-nil Link.
+ // Last, after all the other consistency checks,
+ // make sure time stamps respect sequence numbers.
+ // The tests will skip (not fail) the test case if they see this error,
+ // so check everything else that could possibly be wrong first.
+ lastTs := int64(0)
+ for _, ev := range events {
+ if ev.Ts < lastTs {
+ return ErrTimeOrder
+ }
+ lastTs = ev.Ts
+ }
+
return nil
}
var n int
n, err = r.Read(buf[:])
if err != nil || n != 1 {
- return 0, 0, fmt.Errorf("failed to read trace at offset: read %v, error %v", off0, n, err)
+ return 0, 0, fmt.Errorf("failed to read trace at offset %d: read %v, error %v", off0, n, err)
}
off++
v |= uint64(buf[0]&0x7f) << (uint(i) * 7)
}
func (l eventList) Less(i, j int) bool {
- return l[i].Ts < l[j].Ts
+ return l[i].Seq < l[j].Seq
}
func (l eventList) Swap(i, j int) {
Args []string
}{
EvNone: {"None", false, []string{}},
- EvBatch: {"Batch", false, []string{"p", "ticks"}},
- EvFrequency: {"Frequency", false, []string{"freq"}},
+ EvBatch: {"Batch", false, []string{"p", "seq", "ticks"}},
+ EvFrequency: {"Frequency", false, []string{"freq", "unused"}},
EvStack: {"Stack", false, []string{"id", "siz"}},
EvGomaxprocs: {"Gomaxprocs", true, []string{"procs"}},
EvProcStart: {"ProcStart", false, []string{"thread"}},
EvGoBlockCond: {"GoBlockCond", true, []string{}},
EvGoBlockNet: {"GoBlockNet", true, []string{}},
EvGoSysCall: {"GoSysCall", true, []string{}},
- EvGoSysExit: {"GoSysExit", false, []string{"g", "ts"}},
+ EvGoSysExit: {"GoSysExit", false, []string{"g", "seq", "ts"}},
EvGoSysBlock: {"GoSysBlock", false, []string{}},
EvGoWaiting: {"GoWaiting", false, []string{"g"}},
EvGoInSyscall: {"GoInSyscall", false, []string{"g"}},
EvHeapAlloc: {"HeapAlloc", false, []string{"mem"}},
EvNextGC: {"NextGC", false, []string{"mem"}},
- EvTimerGoroutine: {"TimerGoroutine", false, []string{"g"}},
+ EvTimerGoroutine: {"TimerGoroutine", false, []string{"g", "unused"}},
EvFutileWakeup: {"FutileWakeup", false, []string{}},
}
"gotrace\x00Q00\x020",
"gotrace\x00T00\x020",
"gotrace\x00\xc3\x0200",
+ "go 1.5 trace\x00\x00\x00\x00\x020",
+ "go 1.5 trace\x00\x00\x00\x00Q00\x020",
+ "go 1.5 trace\x00\x00\x00\x00T00\x020",
+ "go 1.5 trace\x00\x00\x00\x00\xc3\x0200",
}
for _, data := range tests {
events, err := Parse(strings.NewReader(data))
// GoSysExit has to happen when we have a P, but before GoStart.
// So we emit it here.
if gp.syscallsp != 0 && gp.sysblocktraced {
- traceGoSysExit(gp.sysexitticks)
+ traceGoSysExit(gp.sysexitseq, gp.sysexitticks)
}
traceGoStart()
}
}
_g_.sysexitticks = 0
+ _g_.sysexitseq = 0
if trace.enabled {
// Wait till traceGoSysBlock event is emitted.
// This ensures consistency of the trace (the goroutine is started after it is blocked).
// Tracing code can invoke write barriers that cannot run without a P.
// So instead we remember the syscall exit time and emit the event
// in execute when we have a P.
- _g_.sysexitticks = cputicks()
+ _g_.sysexitseq, _g_.sysexitticks = tracestamp()
}
_g_.m.locks--
// Denote blocking of the new syscall.
traceGoSysBlock(_g_.m.p.ptr())
// Denote completion of the current syscall.
- traceGoSysExit(0)
+ traceGoSysExit(tracestamp())
})
}
_g_.m.p.ptr().syscalltick++
osyield()
}
}
- traceGoSysExit(0)
+ traceGoSysExit(tracestamp())
}
})
if ok {
waitsince int64 // approx time when the g become blocked
waitreason string // if status==Gwaiting
schedlink guintptr
- preempt bool // preemption signal, duplicates stackguard0 = stackpreempt
- paniconfault bool // panic (instead of crash) on unexpected fault address
- preemptscan bool // preempted g does scan for gc
- gcscandone bool // g has scanned stack; protected by _Gscan bit in status
- gcscanvalid bool // false at start of gc cycle, true if G has not run since last scan
- throwsplit bool // must not split stack
- raceignore int8 // ignore race detection events
- sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine
- sysexitticks int64 // cputicks when syscall has returned (for tracing)
- m *m // for debuggers, but offset not hard-coded
+ preempt bool // preemption signal, duplicates stackguard0 = stackpreempt
+ paniconfault bool // panic (instead of crash) on unexpected fault address
+ preemptscan bool // preempted g does scan for gc
+ gcscandone bool // g has scanned stack; protected by _Gscan bit in status
+ gcscanvalid bool // false at start of gc cycle, true if G has not run since last scan
+ throwsplit bool // must not split stack
+ raceignore int8 // ignore race detection events
+ sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine
+ sysexitticks int64 // cputicks when syscall has returned (for tracing)
+ sysexitseq uint64 // trace seq when syscall has returned (for tracing)
+ m *m // for debuggers, but offset not hard-coded
lockedm *m
sig uint32
writebuf []byte
headerWritten bool // whether ReadTrace has emitted trace header
footerWritten bool // whether ReadTrace has emitted trace footer
shutdownSema uint32 // used to wait for ReadTrace completion
+ seqStart uint64 // sequence number when tracing was started
ticksStart int64 // cputicks when tracing was started
ticksEnd int64 // cputicks when tracing was stopped
timeStart int64 // nanotime when tracing was started
buf *traceBuf // global trace buffer, used when running without a p
}
+var traceseq uint64 // global trace sequence number
+
+// tracestamp returns a consistent sequence number, time stamp pair
+// for use in a trace. We need to make sure that time stamp ordering
+// (assuming synchronized CPUs) and sequence ordering match.
+// To do that, we increment traceseq, grab ticks, and increment traceseq again.
+// We treat odd traceseq as a sign that another thread is in the middle
+// of the sequence and spin until it is done.
+// Not splitting stack to avoid preemption, just in case the call sites
+// that used to call xadd64 and cputicks are sensitive to that.
+//go:nosplit
+func tracestamp() (seq uint64, ts int64) {
+ seq = atomicload64(&traceseq)
+ for seq&1 != 0 || !cas64(&traceseq, seq, seq+1) {
+ seq = atomicload64(&traceseq)
+ }
+ ts = cputicks()
+ atomicstore64(&traceseq, seq+2)
+ return seq >> 1, ts
+}
+
// traceBufHeader is per-P tracing buffer.
type traceBufHeader struct {
link *traceBuf // in trace.empty/full
+ lastSeq uint64 // sequence number of last event
lastTicks uint64 // when we wrote the last event
buf []byte // trace data, always points to traceBuf.arr
stk [traceStackSize]uintptr // scratch buffer for traceback
return errorString("tracing is already enabled")
}
- trace.ticksStart = cputicks()
+ trace.seqStart, trace.ticksStart = tracestamp()
trace.timeStart = nanotime()
trace.headerWritten = false
trace.footerWritten = false
trace.headerWritten = true
trace.lockOwner = nil
unlock(&trace.lock)
- return []byte("gotrace\x00")
+ return []byte("go 1.5 trace\x00\x00\x00\x00")
}
// Wait for new data.
if trace.fullHead == nil && !trace.shutdown {
var data []byte
data = append(data, traceEvFrequency|0<<traceArgCountShift)
data = traceAppend(data, uint64(freq))
+ data = traceAppend(data, 0)
if timers.gp != nil {
data = append(data, traceEvTimerGoroutine|0<<traceArgCountShift)
data = traceAppend(data, uint64(timers.gp.goid))
+ data = traceAppend(data, 0)
}
return data
}
return
}
buf := *bufp
- const maxSize = 2 + 4*traceBytesPerNumber // event type, length, timestamp, stack id and two add params
+ const maxSize = 2 + 5*traceBytesPerNumber // event type, length, sequence, timestamp, stack id and two add params
if buf == nil || cap(buf.buf)-len(buf.buf) < maxSize {
buf = traceFlush(buf)
*bufp = buf
}
- ticks := uint64(cputicks()) / traceTickDiv
+ seq, ticksraw := tracestamp()
+ seqDiff := seq - buf.lastSeq
+ ticks := uint64(ticksraw) / traceTickDiv
tickDiff := ticks - buf.lastTicks
if len(buf.buf) == 0 {
data := buf.buf
data = append(data, traceEvBatch|1<<traceArgCountShift)
data = traceAppend(data, uint64(pid))
+ data = traceAppend(data, seq)
data = traceAppend(data, ticks)
buf.buf = data
+ seqDiff = 0
tickDiff = 0
}
+ buf.lastSeq = seq
buf.lastTicks = ticks
narg := byte(len(args))
if skip >= 0 {
data = append(data, 0)
lenp = &data[len(data)-1]
}
+ data = traceAppend(data, seqDiff)
data = traceAppend(data, tickDiff)
for _, a := range args {
data = traceAppend(data, a)
traceEvent(traceEvGoSysCall, 4)
}
-func traceGoSysExit(ts int64) {
- if ts != 0 && ts < trace.ticksStart {
+func traceGoSysExit(seq uint64, ts int64) {
+ if int64(seq)-int64(trace.seqStart) < 0 {
// The timestamp was obtained during a previous tracing session, ignore.
return
}
- traceEvent(traceEvGoSysExit, -1, uint64(getg().m.curg.goid), uint64(ts)/traceTickDiv)
+ traceEvent(traceEvGoSysExit, -1, uint64(getg().m.curg.goid), seq, uint64(ts)/traceTickDiv)
}
func traceGoSysBlock(pp *p) {
<-pipeReadDone
Stop()
- events, _, err := parseTrace(buf)
+ events, _, err := parseTrace(t, buf)
if err != nil {
t.Fatalf("failed to parse trace: %v", err)
}
}
Stop()
_, err := trace.Parse(buf)
+ if err == trace.ErrTimeOrder {
+ t.Skipf("skipping trace: %v", err)
+ }
if err != nil {
t.Fatalf("failed to parse trace: %v", err)
}
}
-func parseTrace(r io.Reader) ([]*trace.Event, map[uint64]*trace.GDesc, error) {
+func parseTrace(t *testing.T, r io.Reader) ([]*trace.Event, map[uint64]*trace.GDesc, error) {
events, err := trace.Parse(r)
+ if err == trace.ErrTimeOrder {
+ t.Skipf("skipping trace: %v", err)
+ }
if err != nil {
return nil, nil, err
}
runtime.GOMAXPROCS(procs)
Stop()
- _, _, err = parseTrace(buf)
+ _, _, err = parseTrace(t, buf)
if err != nil {
t.Fatalf("failed to parse trace: %v", err)
}
}
time.Sleep(time.Millisecond)
Stop()
- if _, _, err := parseTrace(buf); err != nil {
+ if _, _, err := parseTrace(t, buf); err != nil {
t.Fatalf("failed to parse trace: %v", err)
}
}
done.Wait()
Stop()
- events, _, err := parseTrace(buf)
+ events, _, err := parseTrace(t, buf)
if err != nil {
t.Fatalf("failed to parse trace: %v", err)
}