runtime/trace: record event sequence numbers explicitly

author Russ Cox <rsc@golang.org>

Thu, 23 Jul 2015 18:01:03 +0000 (14:01 -0400)

committer Russ Cox <rsc@golang.org>

Wed, 29 Jul 2015 22:32:14 +0000 (22:32 +0000)
author Russ Cox <rsc@golang.org>
Thu, 23 Jul 2015 18:01:03 +0000 (14:01 -0400)
committer Russ Cox <rsc@golang.org>
Wed, 29 Jul 2015 22:32:14 +0000 (22:32 +0000)
diff --git a/src/internal/trace/parser.go b/src/internal/trace/parser.go

index bfe6b7301545baf6d7e121c340f17a5ab914439b..294a52f9764bb96639958edde66b24d010d220ff 100644 (file)
--- a/src/internal/trace/parser.go
+++ b/src/internal/trace/parser.go
@@ -20,12 +20,13 @@ import (
  type Event struct {
         Off   int       // offset in input file (for debugging and error reporting)
         Type  byte      // one of Ev*
+       Seq   int64     // sequence number
         Ts    int64     // timestamp in nanoseconds
         P     int       // P on which the event happened (can be one of TimerP, NetpollP, SyscallP)
         G     uint64    // G on which the event happened
         StkID uint64    // unique stack ID
         Stk   []*Frame  // stack trace (can be empty)
-       Args  [2]uint64 // event-type-specific arguments
+       Args  [3]uint64 // event-type-specific arguments
         // linked event (can be nil), depends on event type:
         // for GCStart: the GCStop
         // for GCScanStart: the GCScanDone
@@ -88,12 +89,12 @@ type rawEvent struct {
  // It does not care about specific event types and argument meaning.
  func readTrace(r io.Reader) ([]rawEvent, error) {
         // Read and validate trace header.
-       var buf [8]byte
+       var buf [16]byte
         off, err := r.Read(buf[:])
-       if off != 8 || err != nil {
+       if off != 16 || err != nil {
                 return nil, fmt.Errorf("failed to read header: read %v, err %v", off, err)
         }
-       if bytes.Compare(buf[:], []byte("gotrace\x00")) != 0 {
+       if bytes.Compare(buf[:], []byte("go 1.5 trace\x00\x00\x00\x00")) != 0 {
                 return nil, fmt.Errorf("not a trace file")
         }
  
@@ -111,10 +112,10 @@ func readTrace(r io.Reader) ([]rawEvent, error) {
                 }
                 off += n
                 typ := buf[0] << 2 >> 2
-               narg := buf[0]>>6 + 1
+               narg := buf[0] >> 6
                 ev := rawEvent{typ: typ, off: off0}
-               if narg <= 3 {
-                       for i := 0; i < int(narg); i++ {
+               if narg < 3 {
+                       for i := 0; i < int(narg)+2; i++ { // sequence number and time stamp are present but not counted in narg
                                 var v uint64
                                 v, off, err = readVal(r, off)
                                 if err != nil {
@@ -123,7 +124,7 @@ func readTrace(r io.Reader) ([]rawEvent, error) {
                                 ev.args = append(ev.args, v)
                         }
                 } else {
-                       // If narg == 4, the first value is length of the event in bytes.
+                       // If narg == 3, the first value is length of the event in bytes.
                         var v uint64
                         v, off, err = readVal(r, off)
                         if err != nil {
@@ -150,7 +151,7 @@ func readTrace(r io.Reader) ([]rawEvent, error) {
  // Parse events transforms raw events into events.
  // It does analyze and verify per-event-type arguments.
  func parseEvents(rawEvents []rawEvent) (events []*Event, err error) {
-       var ticksPerSec, lastTs int64
+       var ticksPerSec, lastSeq, lastTs int64
         var lastG, timerGoid uint64
         var lastP int
         lastGs := make(map[int]uint64) // last goroutine running on P
@@ -171,6 +172,7 @@ func parseEvents(rawEvents []rawEvent) (events []*Event, err error) {
                                 narg++
                         }
                         if raw.typ != EvBatch && raw.typ != EvFrequency && raw.typ != EvTimerGoroutine {
+                               narg++ // sequence number
                                 narg++ // timestamp
                         }
                         if len(raw.args) != narg {
@@ -184,7 +186,8 @@ func parseEvents(rawEvents []rawEvent) (events []*Event, err error) {
                         lastGs[lastP] = lastG
                         lastP = int(raw.args[0])
                         lastG = lastGs[lastP]
-                       lastTs = int64(raw.args[1])
+                       lastSeq = int64(raw.args[1])
+                       lastTs = int64(raw.args[2])
                 case EvFrequency:
                         ticksPerSec = int64(raw.args[0])
                         if ticksPerSec <= 0 {
@@ -221,13 +224,15 @@ func parseEvents(rawEvents []rawEvent) (events []*Event, err error) {
                         }
                 default:
                         e := &Event{Off: raw.off, Type: raw.typ, P: lastP, G: lastG}
-                       e.Ts = lastTs + int64(raw.args[0])
+                       e.Seq = lastSeq + int64(raw.args[0])
+                       e.Ts = lastTs + int64(raw.args[1])
+                       lastSeq = e.Seq
                         lastTs = e.Ts
                         for i := range desc.Args {
-                               e.Args[i] = raw.args[i+1]
+                               e.Args[i] = raw.args[i+2]
                         }
                         if desc.Stack {
-                               e.StkID = raw.args[len(desc.Args)+1]
+                               e.StkID = raw.args[len(desc.Args)+2]
                         }
                         switch raw.typ {
                         case EvGoStart:
@@ -241,10 +246,11 @@ func parseEvents(rawEvents []rawEvent) (events []*Event, err error) {
                                 EvGoSysBlock:
                                 lastG = 0
                         case EvGoSysExit:
-                               if e.Args[1] != 0 {
-                                       // EvGoSysExit emission is delayed until the thread has a P.
-                                       // Give it the real timestamp.
-                                       e.Ts = int64(e.Args[1])
+                               // EvGoSysExit emission is delayed until the thread has a P.
+                               // Give it the real sequence number and time stamp.
+                               e.Seq = int64(e.Args[1])
+                               if e.Args[2] != 0 {
+                                       e.Ts = int64(e.Args[2])
                                 }
                         }
                         events = append(events, e)
@@ -262,10 +268,8 @@ func parseEvents(rawEvents []rawEvent) (events []*Event, err error) {
                 }
         }
  
-       // Sort by time and translate cpu ticks to real time.
-       // Use stable sort because adjacent events in a batch can have the same timestamp
-       // (this was observed on some VMs).
-       sort.Stable(eventList(events))
+       // Sort by sequence number and translate cpu ticks to real time.
+       sort.Sort(eventList(events))
         if ticksPerSec == 0 {
                 err = fmt.Errorf("no EvFrequency event")
                 return
@@ -341,6 +345,10 @@ func removeFutile(events []*Event) ([]*Event, error) {
         return newEvents, nil
  }
  
+// ErrTimeOrder is returned by Parse when the trace contains
+// time stamps that do not respect actual event ordering.
+var ErrTimeOrder = fmt.Errorf("time stamps out of order")
+
  // postProcessTrace does inter-event verification and information restoration.
  // The resulting trace is guaranteed to be consistent
  // (for example, a P does not run two Gs at the same time, or a G is indeed
@@ -555,6 +563,18 @@ func postProcessTrace(events []*Event) error {
         // TODO(dvyukov): restore stacks for EvGoStart events.
         // TODO(dvyukov): test that all EvGoStart events has non-nil Link.
  
+       // Last, after all the other consistency checks,
+       // make sure time stamps respect sequence numbers.
+       // The tests will skip (not fail) the test case if they see this error,
+       // so check everything else that could possibly be wrong first.
+       lastTs := int64(0)
+       for _, ev := range events {
+               if ev.Ts < lastTs {
+                       return ErrTimeOrder
+               }
+               lastTs = ev.Ts
+       }
+
         return nil
  }
  
@@ -639,7 +659,7 @@ func readVal(r io.Reader, off0 int) (v uint64, off int, err error) {
                 var n int
                 n, err = r.Read(buf[:])
                 if err != nil || n != 1 {
-                       return 0, 0, fmt.Errorf("failed to read trace at offset: read %v, error %v", off0, n, err)
+                       return 0, 0, fmt.Errorf("failed to read trace at offset %d: read %v, error %v", off0, n, err)
                 }
                 off++
                 v |= uint64(buf[0]&0x7f) << (uint(i) * 7)
@@ -657,7 +677,7 @@ func (l eventList) Len() int {
  }
  
  func (l eventList) Less(i, j int) bool {
-       return l[i].Ts < l[j].Ts
+       return l[i].Seq < l[j].Seq
  }
  
  func (l eventList) Swap(i, j int) {
@@ -725,8 +745,8 @@ var EventDescriptions = [EvCount]struct {
         Args  []string
  }{
         EvNone:           {"None", false, []string{}},
-       EvBatch:          {"Batch", false, []string{"p", "ticks"}},
-       EvFrequency:      {"Frequency", false, []string{"freq"}},
+       EvBatch:          {"Batch", false, []string{"p", "seq", "ticks"}},
+       EvFrequency:      {"Frequency", false, []string{"freq", "unused"}},
         EvStack:          {"Stack", false, []string{"id", "siz"}},
         EvGomaxprocs:     {"Gomaxprocs", true, []string{"procs"}},
         EvProcStart:      {"ProcStart", false, []string{"thread"}},
@@ -753,12 +773,12 @@ var EventDescriptions = [EvCount]struct {
         EvGoBlockCond:    {"GoBlockCond", true, []string{}},
         EvGoBlockNet:     {"GoBlockNet", true, []string{}},
         EvGoSysCall:      {"GoSysCall", true, []string{}},
-       EvGoSysExit:      {"GoSysExit", false, []string{"g", "ts"}},
+       EvGoSysExit:      {"GoSysExit", false, []string{"g", "seq", "ts"}},
         EvGoSysBlock:     {"GoSysBlock", false, []string{}},
         EvGoWaiting:      {"GoWaiting", false, []string{"g"}},
         EvGoInSyscall:    {"GoInSyscall", false, []string{"g"}},
         EvHeapAlloc:      {"HeapAlloc", false, []string{"mem"}},
         EvNextGC:         {"NextGC", false, []string{"mem"}},
-       EvTimerGoroutine: {"TimerGoroutine", false, []string{"g"}},
+       EvTimerGoroutine: {"TimerGoroutine", false, []string{"g", "unused"}},
         EvFutileWakeup:   {"FutileWakeup", false, []string{}},
  }
diff --git a/src/internal/trace/parser_test.go b/src/internal/trace/parser_test.go

index 7d066e86fe1cd2e18512fff21a56d8ad36104169..0eeb3e600e44848a8ffdf4c69c46d62ad1246508 100644 (file)
--- a/src/internal/trace/parser_test.go
+++ b/src/internal/trace/parser_test.go
@@ -16,6 +16,10 @@ func TestCorruptedInputs(t *testing.T) {
                 "gotrace\x00Q00\x020",
                 "gotrace\x00T00\x020",
                 "gotrace\x00\xc3\x0200",
+               "go 1.5 trace\x00\x00\x00\x00\x020",
+               "go 1.5 trace\x00\x00\x00\x00Q00\x020",
+               "go 1.5 trace\x00\x00\x00\x00T00\x020",
+               "go 1.5 trace\x00\x00\x00\x00\xc3\x0200",
         }
         for _, data := range tests {
                 events, err := Parse(strings.NewReader(data))
diff --git a/src/runtime/proc1.go b/src/runtime/proc1.go

index 788f4fd3b4c295df42ce7c908807a0ba2ce8377c..23beaf537c83f3d6946920001c2372913b8ae977 100644 (file)
--- a/src/runtime/proc1.go
+++ b/src/runtime/proc1.go
@@ -1348,7 +1348,7 @@ func execute(gp *g, inheritTime bool) {
                 // GoSysExit has to happen when we have a P, but before GoStart.
                 // So we emit it here.
                 if gp.syscallsp != 0 && gp.sysblocktraced {
-                       traceGoSysExit(gp.sysexitticks)
+                       traceGoSysExit(gp.sysexitseq, gp.sysexitticks)
                 }
                 traceGoStart()
         }
@@ -1986,6 +1986,7 @@ func exitsyscall(dummy int32) {
         }
  
         _g_.sysexitticks = 0
+       _g_.sysexitseq = 0
         if trace.enabled {
                 // Wait till traceGoSysBlock event is emitted.
                 // This ensures consistency of the trace (the goroutine is started after it is blocked).
@@ -1996,7 +1997,7 @@ func exitsyscall(dummy int32) {
                 // Tracing code can invoke write barriers that cannot run without a P.
                 // So instead we remember the syscall exit time and emit the event
                 // in execute when we have a P.
-               _g_.sysexitticks = cputicks()
+               _g_.sysexitseq, _g_.sysexitticks = tracestamp()
         }
  
         _g_.m.locks--
@@ -2044,7 +2045,7 @@ func exitsyscallfast() bool {
                                         // Denote blocking of the new syscall.
                                         traceGoSysBlock(_g_.m.p.ptr())
                                         // Denote completion of the current syscall.
-                                       traceGoSysExit(0)
+                                       traceGoSysExit(tracestamp())
                                 })
                         }
                         _g_.m.p.ptr().syscalltick++
@@ -2068,7 +2069,7 @@ func exitsyscallfast() bool {
                                                 osyield()
                                         }
                                 }
-                               traceGoSysExit(0)
+                               traceGoSysExit(tracestamp())
                         }
                 })
                 if ok {
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go

index dc600ae5782375cfe4efb8b417f4a85aba4dfcf1..a1c790fa852a588a4037ce935f811c390903731d 100644 (file)
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -235,16 +235,17 @@ type g struct {
         waitsince      int64  // approx time when the g become blocked
         waitreason     string // if status==Gwaiting
         schedlink      guintptr
-       preempt        bool  // preemption signal, duplicates stackguard0 = stackpreempt
-       paniconfault   bool  // panic (instead of crash) on unexpected fault address
-       preemptscan    bool  // preempted g does scan for gc
-       gcscandone     bool  // g has scanned stack; protected by _Gscan bit in status
-       gcscanvalid    bool  // false at start of gc cycle, true if G has not run since last scan
-       throwsplit     bool  // must not split stack
-       raceignore     int8  // ignore race detection events
-       sysblocktraced bool  // StartTrace has emitted EvGoInSyscall about this goroutine
-       sysexitticks   int64 // cputicks when syscall has returned (for tracing)
-       m              *m    // for debuggers, but offset not hard-coded
+       preempt        bool   // preemption signal, duplicates stackguard0 = stackpreempt
+       paniconfault   bool   // panic (instead of crash) on unexpected fault address
+       preemptscan    bool   // preempted g does scan for gc
+       gcscandone     bool   // g has scanned stack; protected by _Gscan bit in status
+       gcscanvalid    bool   // false at start of gc cycle, true if G has not run since last scan
+       throwsplit     bool   // must not split stack
+       raceignore     int8   // ignore race detection events
+       sysblocktraced bool   // StartTrace has emitted EvGoInSyscall about this goroutine
+       sysexitticks   int64  // cputicks when syscall has returned (for tracing)
+       sysexitseq     uint64 // trace seq when syscall has returned (for tracing)
+       m              *m     // for debuggers, but offset not hard-coded
         lockedm        *m
         sig            uint32
         writebuf       []byte
diff --git a/src/runtime/trace.go b/src/runtime/trace.go

index 8cd95893a0ac83ce93a241f69e79ad9a698b7e39..c81846221a2531e1d5b2990bf72414159fe3589e 100644 (file)
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -95,6 +95,7 @@ var trace struct {
         headerWritten bool      // whether ReadTrace has emitted trace header
         footerWritten bool      // whether ReadTrace has emitted trace footer
         shutdownSema  uint32    // used to wait for ReadTrace completion
+       seqStart      uint64    // sequence number when tracing was started
         ticksStart    int64     // cputicks when tracing was started
         ticksEnd      int64     // cputicks when tracing was stopped
         timeStart     int64     // nanotime when tracing was started
@@ -110,9 +111,31 @@ var trace struct {
         buf     *traceBuf // global trace buffer, used when running without a p
  }
  
+var traceseq uint64 // global trace sequence number
+
+// tracestamp returns a consistent sequence number, time stamp pair
+// for use in a trace. We need to make sure that time stamp ordering
+// (assuming synchronized CPUs) and sequence ordering match.
+// To do that, we increment traceseq, grab ticks, and increment traceseq again.
+// We treat odd traceseq as a sign that another thread is in the middle
+// of the sequence and spin until it is done.
+// Not splitting stack to avoid preemption, just in case the call sites
+// that used to call xadd64 and cputicks are sensitive to that.
+//go:nosplit
+func tracestamp() (seq uint64, ts int64) {
+       seq = atomicload64(&traceseq)
+       for seq&1 != 0 || !cas64(&traceseq, seq, seq+1) {
+               seq = atomicload64(&traceseq)
+       }
+       ts = cputicks()
+       atomicstore64(&traceseq, seq+2)
+       return seq >> 1, ts
+}
+
  // traceBufHeader is per-P tracing buffer.
  type traceBufHeader struct {
         link      *traceBuf               // in trace.empty/full
+       lastSeq   uint64                  // sequence number of last event
         lastTicks uint64                  // when we wrote the last event
         buf       []byte                  // trace data, always points to traceBuf.arr
         stk       [traceStackSize]uintptr // scratch buffer for traceback
@@ -147,7 +170,7 @@ func StartTrace() error {
                 return errorString("tracing is already enabled")
         }
  
-       trace.ticksStart = cputicks()
+       trace.seqStart, trace.ticksStart = tracestamp()
         trace.timeStart = nanotime()
         trace.headerWritten = false
         trace.footerWritten = false
@@ -308,7 +331,7 @@ func ReadTrace() []byte {
                 trace.headerWritten = true
                 trace.lockOwner = nil
                 unlock(&trace.lock)
-               return []byte("gotrace\x00")
+               return []byte("go 1.5 trace\x00\x00\x00\x00")
         }
         // Wait for new data.
         if trace.fullHead == nil && !trace.shutdown {
@@ -334,9 +357,11 @@ func ReadTrace() []byte {
                 var data []byte
                 data = append(data, traceEvFrequency|0<<traceArgCountShift)
                 data = traceAppend(data, uint64(freq))
+               data = traceAppend(data, 0)
                 if timers.gp != nil {
                         data = append(data, traceEvTimerGoroutine|0<<traceArgCountShift)
                         data = traceAppend(data, uint64(timers.gp.goid))
+                       data = traceAppend(data, 0)
                 }
                 return data
         }
@@ -435,22 +460,27 @@ func traceEvent(ev byte, skip int, args ...uint64) {
                 return
         }
         buf := *bufp
-       const maxSize = 2 + 4*traceBytesPerNumber // event type, length, timestamp, stack id and two add params
+       const maxSize = 2 + 5*traceBytesPerNumber // event type, length, sequence, timestamp, stack id and two add params
         if buf == nil || cap(buf.buf)-len(buf.buf) < maxSize {
                 buf = traceFlush(buf)
                 *bufp = buf
         }
  
-       ticks := uint64(cputicks()) / traceTickDiv
+       seq, ticksraw := tracestamp()
+       seqDiff := seq - buf.lastSeq
+       ticks := uint64(ticksraw) / traceTickDiv
         tickDiff := ticks - buf.lastTicks
         if len(buf.buf) == 0 {
                 data := buf.buf
                 data = append(data, traceEvBatch|1<<traceArgCountShift)
                 data = traceAppend(data, uint64(pid))
+               data = traceAppend(data, seq)
                 data = traceAppend(data, ticks)
                 buf.buf = data
+               seqDiff = 0
                 tickDiff = 0
         }
+       buf.lastSeq = seq
         buf.lastTicks = ticks
         narg := byte(len(args))
         if skip >= 0 {
@@ -469,6 +499,7 @@ func traceEvent(ev byte, skip int, args ...uint64) {
                 data = append(data, 0)
                 lenp = &data[len(data)-1]
         }
+       data = traceAppend(data, seqDiff)
         data = traceAppend(data, tickDiff)
         for _, a := range args {
                 data = traceAppend(data, a)
@@ -800,12 +831,12 @@ func traceGoSysCall() {
         traceEvent(traceEvGoSysCall, 4)
  }
  
-func traceGoSysExit(ts int64) {
-       if ts != 0 && ts < trace.ticksStart {
+func traceGoSysExit(seq uint64, ts int64) {
+       if int64(seq)-int64(trace.seqStart) < 0 {
                 // The timestamp was obtained during a previous tracing session, ignore.
                 return
         }
-       traceEvent(traceEvGoSysExit, -1, uint64(getg().m.curg.goid), uint64(ts)/traceTickDiv)
+       traceEvent(traceEvGoSysExit, -1, uint64(getg().m.curg.goid), seq, uint64(ts)/traceTickDiv)
  }
  
  func traceGoSysBlock(pp *p) {
diff --git a/src/runtime/trace/trace_stack_test.go b/src/runtime/trace/trace_stack_test.go

index 1fca454ed017970ee417108814e7a9fc700f1182..58139ba14463aebef4ffe8b4f6599f25cb382da7 100644 (file)
--- a/src/runtime/trace/trace_stack_test.go
+++ b/src/runtime/trace/trace_stack_test.go
@@ -125,7 +125,7 @@ func TestTraceSymbolize(t *testing.T) {
         <-pipeReadDone
  
         Stop()
-       events, _, err := parseTrace(buf)
+       events, _, err := parseTrace(t, buf)
         if err != nil {
                 t.Fatalf("failed to parse trace: %v", err)
         }
diff --git a/src/runtime/trace/trace_test.go b/src/runtime/trace/trace_test.go

index e987564d8bdfdd8595c3eaba5888f0722446953c..76d3d44ae70d1074d23659924e370d44855631c0 100644 (file)
--- a/src/runtime/trace/trace_test.go
+++ b/src/runtime/trace/trace_test.go
@@ -75,13 +75,19 @@ func TestTrace(t *testing.T) {
         }
         Stop()
         _, err := trace.Parse(buf)
+       if err == trace.ErrTimeOrder {
+               t.Skipf("skipping trace: %v", err)
+       }
         if err != nil {
                 t.Fatalf("failed to parse trace: %v", err)
         }
  }
  
-func parseTrace(r io.Reader) ([]*trace.Event, map[uint64]*trace.GDesc, error) {
+func parseTrace(t *testing.T, r io.Reader) ([]*trace.Event, map[uint64]*trace.GDesc, error) {
         events, err := trace.Parse(r)
+       if err == trace.ErrTimeOrder {
+               t.Skipf("skipping trace: %v", err)
+       }
         if err != nil {
                 return nil, nil, err
         }
@@ -221,7 +227,7 @@ func TestTraceStress(t *testing.T) {
         runtime.GOMAXPROCS(procs)
  
         Stop()
-       _, _, err = parseTrace(buf)
+       _, _, err = parseTrace(t, buf)
         if err != nil {
                 t.Fatalf("failed to parse trace: %v", err)
         }
@@ -361,7 +367,7 @@ func TestTraceStressStartStop(t *testing.T) {
                 }
                 time.Sleep(time.Millisecond)
                 Stop()
-               if _, _, err := parseTrace(buf); err != nil {
+               if _, _, err := parseTrace(t, buf); err != nil {
                         t.Fatalf("failed to parse trace: %v", err)
                 }
         }
@@ -428,7 +434,7 @@ func TestTraceFutileWakeup(t *testing.T) {
         done.Wait()
  
         Stop()
-       events, _, err := parseTrace(buf)
+       events, _, err := parseTrace(t, buf)
         if err != nil {
                 t.Fatalf("failed to parse trace: %v", err)
         }
author	Russ Cox <rsc@golang.org>
	Thu, 23 Jul 2015 18:01:03 +0000 (14:01 -0400)
committer	Russ Cox <rsc@golang.org>
	Wed, 29 Jul 2015 22:32:14 +0000 (22:32 +0000)
src/internal/trace/parser.go		patch \| blob \| history
src/internal/trace/parser_test.go		patch \| blob \| history
src/runtime/proc1.go		patch \| blob \| history
src/runtime/runtime2.go		patch \| blob \| history
src/runtime/trace.go		patch \| blob \| history
src/runtime/trace/trace_stack_test.go		patch \| blob \| history
src/runtime/trace/trace_test.go		patch \| blob \| history