runtime: use per-goroutine sequence numbers in tracer

author Dmitry Vyukov <dvyukov@google.com>

Tue, 5 Apr 2016 13:29:14 +0000 (15:29 +0200)

committer Dmitry Vyukov <dvyukov@google.com>

Sat, 23 Apr 2016 15:57:05 +0000 (15:57 +0000)
author Dmitry Vyukov <dvyukov@google.com>
Tue, 5 Apr 2016 13:29:14 +0000 (15:29 +0200)
committer Dmitry Vyukov <dvyukov@google.com>
Sat, 23 Apr 2016 15:57:05 +0000 (15:57 +0000)
diff --git a/src/internal/trace/order.go b/src/internal/trace/order.go

new file mode 100644 (file)

index 0000000..f9ec44c
--- /dev/null
+++ b/src/internal/trace/order.go
@@ -0,0 +1,278 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+       "fmt"
+       "sort"
+)
+
+type eventBatch struct {
+       events   []*Event
+       selected bool
+}
+
+type orderEvent struct {
+       ev    *Event
+       batch int
+       g     uint64
+       init  gState
+       next  gState
+}
+
+type gStatus int
+
+type gState struct {
+       seq    uint64
+       status gStatus
+}
+
+const (
+       gDead gStatus = iota
+       gRunnable
+       gRunning
+       gWaiting
+
+       unordered = ^uint64(0)
+       garbage   = ^uint64(0) - 1
+       noseq     = ^uint64(0)
+       seqinc    = ^uint64(0) - 1
+)
+
+// order1007 merges a set of per-P event batches into a single, consistent stream.
+// The high level idea is as follows. Events within an individual batch are in
+// correct order, because they are emitted by a single P. So we need to produce
+// a correct interleaving of the batches. To do this we take first unmerged event
+// from each batch (frontier). Then choose subset that is "ready" to be merged,
+// that is, events for which all dependencies are already merged. Then we choose
+// event with the lowest timestamp from the subset, merge it and repeat.
+// This approach ensures that we form a consistent stream even if timestamps are
+// incorrect (condition observed on some machines).
+func order1007(m map[int][]*Event) (events []*Event, err error) {
+       pending := 0
+       var batches []*eventBatch
+       for _, v := range m {
+               pending += len(v)
+               batches = append(batches, &eventBatch{v, false})
+       }
+       gs := make(map[uint64]gState)
+       var frontier []orderEvent
+       for ; pending != 0; pending-- {
+               for i, b := range batches {
+                       if b.selected || len(b.events) == 0 {
+                               continue
+                       }
+                       ev := b.events[0]
+                       g, init, next := stateTransition(ev)
+                       if !transitionReady(g, gs[g], init) {
+                               continue
+                       }
+                       frontier = append(frontier, orderEvent{ev, i, g, init, next})
+                       b.events = b.events[1:]
+                       b.selected = true
+                       // Get rid of "Local" events, they are intended merely for ordering.
+                       switch ev.Type {
+                       case EvGoStartLocal:
+                               ev.Type = EvGoStart
+                       case EvGoUnblockLocal:
+                               ev.Type = EvGoUnblock
+                       case EvGoSysExitLocal:
+                               ev.Type = EvGoSysExit
+                       }
+               }
+               if len(frontier) == 0 {
+                       return nil, fmt.Errorf("no consistent ordering of events possible")
+               }
+               sort.Sort(orderEventList(frontier))
+               f := frontier[0]
+               frontier[0] = frontier[len(frontier)-1]
+               frontier = frontier[:len(frontier)-1]
+               events = append(events, f.ev)
+               transition(gs, f.g, f.init, f.next)
+               if !batches[f.batch].selected {
+                       panic("frontier batch is not selected")
+               }
+               batches[f.batch].selected = false
+       }
+
+       // At this point we have a consistent stream of events.
+       // Make sure time stamps respect the ordering.
+       // The tests will skip (not fail) the test case if they see this error.
+       if !sort.IsSorted(eventList(events)) {
+               return nil, ErrTimeOrder
+       }
+
+       // The last part is giving correct timestamps to EvGoSysExit events.
+       // The problem with EvGoSysExit is that actual syscall exit timestamp (ev.Args[2])
+       // is potentially acquired long before event emission. So far we've used
+       // timestamp of event emission (ev.Ts).
+       // We could not set ev.Ts = ev.Args[2] earlier, because it would produce
+       // seemingly broken timestamps (misplaced event).
+       // We also can't simply update the timestamp and resort events, because
+       // if timestamps are broken we will misplace the event and later report
+       // logically broken trace (instead of reporting broken timestamps).
+       lastSysBlock := make(map[uint64]int64)
+       for _, ev := range events {
+               switch ev.Type {
+               case EvGoSysBlock, EvGoInSyscall:
+                       lastSysBlock[ev.G] = ev.Ts
+               case EvGoSysExit:
+                       ts := int64(ev.Args[2])
+                       if ts == 0 {
+                               continue
+                       }
+                       block := lastSysBlock[ev.G]
+                       if block == 0 {
+                               return nil, fmt.Errorf("stray syscall exit")
+                       }
+                       if ts < block {
+                               return nil, ErrTimeOrder
+                       }
+                       ev.Ts = ts
+               }
+       }
+       sort.Sort(eventList(events))
+
+       return
+}
+
+// stateTransition returns goroutine state (sequence and status) when the event
+// becomes ready for merging (init) and the goroutine state after the event (next).
+func stateTransition(ev *Event) (g uint64, init, next gState) {
+       switch ev.Type {
+       case EvGoCreate:
+               g = ev.Args[0]
+               init = gState{0, gDead}
+               next = gState{1, gRunnable}
+       case EvGoWaiting, EvGoInSyscall:
+               g = ev.G
+               init = gState{1, gRunnable}
+               next = gState{2, gWaiting}
+       case EvGoStart:
+               g = ev.G
+               init = gState{ev.Args[1], gRunnable}
+               next = gState{ev.Args[1] + 1, gRunning}
+       case EvGoStartLocal:
+               // noseq means that this event is ready for merging as soon as
+               // frontier reaches it (EvGoStartLocal is emitted on the same P
+               // as the corresponding EvGoCreate/EvGoUnblock, and thus the latter
+               // is already merged).
+               // seqinc is a stub for cases when event increments g sequence,
+               // but since we don't know current seq we also don't know next seq.
+               g = ev.G
+               init = gState{noseq, gRunnable}
+               next = gState{seqinc, gRunning}
+       case EvGoBlock, EvGoBlockSend, EvGoBlockRecv, EvGoBlockSelect,
+               EvGoBlockSync, EvGoBlockCond, EvGoBlockNet, EvGoSleep, EvGoSysBlock:
+               g = ev.G
+               init = gState{noseq, gRunning}
+               next = gState{noseq, gWaiting}
+       case EvGoSched, EvGoPreempt:
+               g = ev.G
+               init = gState{noseq, gRunning}
+               next = gState{noseq, gRunnable}
+       case EvGoUnblock, EvGoSysExit:
+               g = ev.Args[0]
+               init = gState{ev.Args[1], gWaiting}
+               next = gState{ev.Args[1] + 1, gRunnable}
+       case EvGoUnblockLocal, EvGoSysExitLocal:
+               g = ev.Args[0]
+               init = gState{noseq, gWaiting}
+               next = gState{seqinc, gRunnable}
+       case EvGCStart:
+               g = garbage
+               init = gState{ev.Args[0], gDead}
+               next = gState{ev.Args[0] + 1, gDead}
+       default:
+               // no ordering requirements
+               g = unordered
+       }
+       return
+}
+
+func transitionReady(g uint64, curr, init gState) bool {
+       return g == unordered || (init.seq == noseq || init.seq == curr.seq) && init.status == curr.status
+}
+
+func transition(gs map[uint64]gState, g uint64, init, next gState) {
+       if g == unordered {
+               return
+       }
+       curr := gs[g]
+       if !transitionReady(g, curr, init) {
+               panic("event sequences are broken")
+       }
+       switch next.seq {
+       case noseq:
+               next.seq = curr.seq
+       case seqinc:
+               next.seq = curr.seq + 1
+       }
+       gs[g] = next
+}
+
+// order1005 merges a set of per-P event batches into a single, consistent stream.
+func order1005(m map[int][]*Event) (events []*Event, err error) {
+       for _, batch := range m {
+               events = append(events, batch...)
+       }
+       for _, ev := range events {
+               if ev.Type == EvGoSysExit {
+                       // EvGoSysExit emission is delayed until the thread has a P.
+                       // Give it the real sequence number and time stamp.
+                       ev.seq = int64(ev.Args[1])
+                       if ev.Args[2] != 0 {
+                               ev.Ts = int64(ev.Args[2])
+                       }
+               }
+       }
+       sort.Sort(eventSeqList(events))
+       if !sort.IsSorted(eventList(events)) {
+               return nil, ErrTimeOrder
+       }
+       return
+}
+
+type orderEventList []orderEvent
+
+func (l orderEventList) Len() int {
+       return len(l)
+}
+
+func (l orderEventList) Less(i, j int) bool {
+       return l[i].ev.Ts < l[j].ev.Ts
+}
+
+func (l orderEventList) Swap(i, j int) {
+       l[i], l[j] = l[j], l[i]
+}
+
+type eventList []*Event
+
+func (l eventList) Len() int {
+       return len(l)
+}
+
+func (l eventList) Less(i, j int) bool {
+       return l[i].Ts < l[j].Ts
+}
+
+func (l eventList) Swap(i, j int) {
+       l[i], l[j] = l[j], l[i]
+}
+
+type eventSeqList []*Event
+
+func (l eventSeqList) Len() int {
+       return len(l)
+}
+
+func (l eventSeqList) Less(i, j int) bool {
+       return l[i].seq < l[j].seq
+}
+
+func (l eventSeqList) Swap(i, j int) {
+       l[i], l[j] = l[j], l[i]
+}
diff --git a/src/internal/trace/parser.go b/src/internal/trace/parser.go

index d279ddeacf711e8b578f792473ec7cf242762a66..e6f29445c15e6611c19635ec5aa5a6278f218118 100644 (file)
--- a/src/internal/trace/parser.go
+++ b/src/internal/trace/parser.go
@@ -11,7 +11,6 @@ import (
         "io"
         "os"
         "os/exec"
-       "sort"
         "strconv"
         "strings"
  )
@@ -135,7 +134,12 @@ func readTrace(r io.Reader) (ver int, events []rawEvent, strings map[uint64]stri
                 }
                 off += n
                 typ := buf[0] << 2 >> 2
-               narg := buf[0] >> 6
+               narg := buf[0]>>6 + 1
+               inlineArgs := byte(4)
+               if ver < 1007 {
+                       narg++
+                       inlineArgs++
+               }
                 if typ == EvNone || typ >= EvCount || EventDescriptions[typ].minVersion > ver {
                         err = fmt.Errorf("unknown event type %v at offset 0x%x", typ, off0)
                         return
@@ -180,8 +184,8 @@ func readTrace(r io.Reader) (ver int, events []rawEvent, strings map[uint64]stri
                         continue
                 }
                 ev := rawEvent{typ: typ, off: off0}
-               if narg < 3 {
-                       for i := 0; i < int(narg)+2; i++ { // sequence number and time stamp are present but not counted in narg
+               if narg < inlineArgs {
+                       for i := 0; i < int(narg); i++ {
                                 var v uint64
                                 v, off, err = readVal(r, off)
                                 if err != nil {
@@ -191,7 +195,7 @@ func readTrace(r io.Reader) (ver int, events []rawEvent, strings map[uint64]stri
                                 ev.args = append(ev.args, v)
                         }
                 } else {
-                       // If narg == 3, the first value is length of the event in bytes.
+                       // More than inlineArgs args, the first value is length of the event in bytes.
                         var v uint64
                         v, off, err = readVal(r, off)
                         if err != nil {
@@ -250,34 +254,30 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
         var lastP int
         lastGs := make(map[int]uint64) // last goroutine running on P
         stacks = make(map[uint64][]*Frame)
+       batches := make(map[int][]*Event) // events by P
         for _, raw := range rawEvents {
                 desc := EventDescriptions[raw.typ]
                 if desc.Name == "" {
                         err = fmt.Errorf("missing description for event type %v", raw.typ)
                         return
                 }
-               if raw.typ != EvStack {
-                       narg := len(desc.Args)
-                       if desc.Stack {
-                               narg++
-                       }
-                       if raw.typ != EvBatch && raw.typ != EvFrequency && raw.typ != EvTimerGoroutine {
-                               narg++ // sequence number
-                               narg++ // timestamp
-                       }
-                       if len(raw.args) != narg {
-                               err = fmt.Errorf("%v has wrong number of arguments at offset 0x%x: want %v, got %v",
-                                       desc.Name, raw.off, narg, len(raw.args))
-                               return
-                       }
+               narg := argNum(raw, ver)
+               if len(raw.args) != narg {
+                       err = fmt.Errorf("%v has wrong number of arguments at offset 0x%x: want %v, got %v",
+                               desc.Name, raw.off, narg, len(raw.args))
+                       return
                 }
                 switch raw.typ {
                 case EvBatch:
                         lastGs[lastP] = lastG
                         lastP = int(raw.args[0])
                         lastG = lastGs[lastP]
-                       lastSeq = int64(raw.args[1])
-                       lastTs = int64(raw.args[2])
+                       if ver < 1007 {
+                               lastSeq = int64(raw.args[1])
+                               lastTs = int64(raw.args[2])
+                       } else {
+                               lastTs = int64(raw.args[1])
+                       }
                 case EvFrequency:
                         ticksPerSec = int64(raw.args[0])
                         if ticksPerSec <= 0 {
@@ -328,18 +328,26 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
                         }
                 default:
                         e := &Event{Off: raw.off, Type: raw.typ, P: lastP, G: lastG}
-                       e.seq = lastSeq + int64(raw.args[0])
-                       e.Ts = lastTs + int64(raw.args[1])
-                       lastSeq = e.seq
-                       lastTs = e.Ts
-                       for i := range desc.Args {
-                               e.Args[i] = raw.args[i+2]
+                       var argOffset int
+                       if ver < 1007 {
+                               e.seq = lastSeq + int64(raw.args[0])
+                               e.Ts = lastTs + int64(raw.args[1])
+                               lastSeq = e.seq
+                               argOffset = 2
+                       } else {
+                               e.Ts = lastTs + int64(raw.args[0])
+                               argOffset = 1
                         }
-                       if desc.Stack {
-                               e.StkID = raw.args[len(desc.Args)+2]
+                       lastTs = e.Ts
+                       for i := argOffset; i < narg; i++ {
+                               if i == narg-1 && desc.Stack {
+                                       e.StkID = raw.args[i]
+                               } else {
+                                       e.Args[i-argOffset] = raw.args[i]
+                               }
                         }
                         switch raw.typ {
-                       case EvGoStart:
+                       case EvGoStart, EvGoStartLocal:
                                 lastG = e.Args[0]
                                 e.G = lastG
                         case EvGCStart, EvGCDone, EvGCScanStart, EvGCScanDone:
@@ -349,28 +357,30 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
                                 EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet,
                                 EvGoSysBlock:
                                 lastG = 0
-                       case EvGoSysExit:
-                               // EvGoSysExit emission is delayed until the thread has a P.
-                               // Give it the real sequence number and time stamp.
-                               e.seq = int64(e.Args[1])
-                               if e.Args[2] != 0 {
-                                       e.Ts = int64(e.Args[2])
-                               }
+                       case EvGoSysExit, EvGoWaiting, EvGoInSyscall:
+                               e.G = e.Args[0]
                         }
-                       events = append(events, e)
+                       batches[lastP] = append(batches[lastP], e)
                 }
         }
-       if len(events) == 0 {
+       if len(batches) == 0 {
                 err = fmt.Errorf("trace is empty")
                 return
         }
-
-       // Sort by sequence number and translate cpu ticks to real time.
-       sort.Sort(eventList(events))
         if ticksPerSec == 0 {
                 err = fmt.Errorf("no EvFrequency event")
                 return
         }
+       if ver < 1007 {
+               events, err = order1005(batches)
+       } else {
+               events, err = order1007(batches)
+       }
+       if err != nil {
+               return
+       }
+
+       // Translate cpu ticks to real time.
         minTs := events[0].Ts
         // Use floating point to avoid integer overflows.
         freq := 1e9 / float64(ticksPerSec)
@@ -382,7 +392,6 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
                 }
                 if ev.Type == EvGoSysExit {
                         ev.P = SyscallP
-                       ev.G = ev.Args[0]
                 }
         }
  
@@ -543,19 +552,15 @@ func postProcessTrace(ver int, events []*Event) error {
                         p.evSweep.Link = ev
                         p.evSweep = nil
                 case EvGoWaiting:
-                       g1 := gs[ev.Args[0]]
-                       if g1.state != gRunnable {
-                               return fmt.Errorf("g %v is not runnable before EvGoWaiting (offset %v, time %v)", ev.Args[0], ev.Off, ev.Ts)
+                       if g.state != gRunnable {
+                               return fmt.Errorf("g %v is not runnable before EvGoWaiting (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
                         }
-                       g1.state = gWaiting
-                       gs[ev.Args[0]] = g1
+                       g.state = gWaiting
                 case EvGoInSyscall:
-                       g1 := gs[ev.Args[0]]
-                       if g1.state != gRunnable {
-                               return fmt.Errorf("g %v is not runnable before EvGoInSyscall (offset %v, time %v)", ev.Args[0], ev.Off, ev.Ts)
+                       if g.state != gRunnable {
+                               return fmt.Errorf("g %v is not runnable before EvGoInSyscall (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
                         }
-                       g1.state = gWaiting
-                       gs[ev.Args[0]] = g1
+                       g.state = gWaiting
                 case EvGoCreate:
                         if err := checkRunning(p, g, ev, true); err != nil {
                                 return err
@@ -666,18 +671,6 @@ func postProcessTrace(ver int, events []*Event) error {
         // TODO(dvyukov): restore stacks for EvGoStart events.
         // TODO(dvyukov): test that all EvGoStart events has non-nil Link.
  
-       // Last, after all the other consistency checks,
-       // make sure time stamps respect sequence numbers.
-       // The tests will skip (not fail) the test case if they see this error,
-       // so check everything else that could possibly be wrong first.
-       lastTs := int64(0)
-       for _, ev := range events {
-               if ev.Ts < lastTs {
-                       return ErrTimeOrder
-               }
-               lastTs = ev.Ts
-       }
-
         return nil
  }
  
@@ -773,30 +766,51 @@ func readVal(r io.Reader, off0 int) (v uint64, off int, err error) {
         return 0, 0, fmt.Errorf("bad value at offset 0x%x", off0)
  }
  
-type eventList []*Event
-
-func (l eventList) Len() int {
-       return len(l)
-}
-
-func (l eventList) Less(i, j int) bool {
-       return l[i].seq < l[j].seq
+// Print dumps events to stdout. For debugging.
+func Print(events []*Event) {
+       for _, ev := range events {
+               PrintEvent(ev)
+       }
  }
  
-func (l eventList) Swap(i, j int) {
-       l[i], l[j] = l[j], l[i]
+// PrintEvent dumps the event to stdout. For debugging.
+func PrintEvent(ev *Event) {
+       desc := EventDescriptions[ev.Type]
+       fmt.Printf("%v %v p=%v g=%v off=%v", ev.Ts, desc.Name, ev.P, ev.G, ev.Off)
+       for i, a := range desc.Args {
+               fmt.Printf(" %v=%v", a, ev.Args[i])
+       }
+       fmt.Printf("\n")
  }
  
-// Print dumps events to stdout. For debugging.
-func Print(events []*Event) {
-       for _, ev := range events {
-               desc := EventDescriptions[ev.Type]
-               fmt.Printf("%v %v p=%v g=%v off=%v", ev.Ts, desc.Name, ev.P, ev.G, ev.Off)
-               for i, a := range desc.Args {
-                       fmt.Printf(" %v=%v", a, ev.Args[i])
+// argNum returns total number of args for the event accounting for timestamps,
+// sequence numbers and differences between trace format versions.
+func argNum(raw rawEvent, ver int) int {
+       desc := EventDescriptions[raw.typ]
+       if raw.typ == EvStack {
+               return len(raw.args)
+       }
+       narg := len(desc.Args)
+       if desc.Stack {
+               narg++
+       }
+       switch raw.typ {
+       case EvBatch, EvFrequency, EvTimerGoroutine:
+               if ver < 1007 {
+                       narg++ // there was an unused arg before 1.7
+               }
+       case EvGCStart, EvGoStart, EvGoUnblock:
+               if ver < 1007 {
+                       narg-- // 1.7 added an additional seq arg
+               }
+               fallthrough
+       default:
+               narg++ // timestamp
+               if ver < 1007 {
+                       narg++ // sequence
                 }
-               fmt.Printf("\n")
         }
+       return narg
  }
  
  // Event types in the trace.
@@ -809,21 +823,21 @@ const (
         EvGomaxprocs     = 4  // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id]
         EvProcStart      = 5  // start of P [timestamp, thread id]
         EvProcStop       = 6  // stop of P [timestamp]
-       EvGCStart        = 7  // GC start [timestamp, stack id]
+       EvGCStart        = 7  // GC start [timestamp, seq, stack id]
         EvGCDone         = 8  // GC done [timestamp]
         EvGCScanStart    = 9  // GC scan start [timestamp]
         EvGCScanDone     = 10 // GC scan done [timestamp]
         EvGCSweepStart   = 11 // GC sweep start [timestamp, stack id]
         EvGCSweepDone    = 12 // GC sweep done [timestamp]
         EvGoCreate       = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
-       EvGoStart        = 14 // goroutine starts running [timestamp, goroutine id]
+       EvGoStart        = 14 // goroutine starts running [timestamp, goroutine id, seq]
         EvGoEnd          = 15 // goroutine ends [timestamp]
         EvGoStop         = 16 // goroutine stops (like in select{}) [timestamp, stack]
         EvGoSched        = 17 // goroutine calls Gosched [timestamp, stack]
         EvGoPreempt      = 18 // goroutine is preempted [timestamp, stack]
         EvGoSleep        = 19 // goroutine calls Sleep [timestamp, stack]
         EvGoBlock        = 20 // goroutine blocks [timestamp, stack]
-       EvGoUnblock      = 21 // goroutine is unblocked [timestamp, goroutine id, stack]
+       EvGoUnblock      = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack]
         EvGoBlockSend    = 22 // goroutine blocks on chan send [timestamp, stack]
         EvGoBlockRecv    = 23 // goroutine blocks on chan recv [timestamp, stack]
         EvGoBlockSelect  = 24 // goroutine blocks on select [timestamp, stack]
@@ -831,7 +845,7 @@ const (
         EvGoBlockCond    = 26 // goroutine blocks on Cond [timestamp, stack]
         EvGoBlockNet     = 27 // goroutine blocks on network [timestamp, stack]
         EvGoSysCall      = 28 // syscall enter [timestamp, stack]
-       EvGoSysExit      = 29 // syscall exit [timestamp, goroutine id, real timestamp]
+       EvGoSysExit      = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp]
         EvGoSysBlock     = 30 // syscall blocks [timestamp]
         EvGoWaiting      = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id]
         EvGoInSyscall    = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id]
@@ -840,7 +854,10 @@ const (
         EvTimerGoroutine = 35 // denotes timer goroutine [timer goroutine id]
         EvFutileWakeup   = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp]
         EvString         = 37 // string dictionary entry [ID, length, string]
-       EvCount          = 38
+       EvGoStartLocal   = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id]
+       EvGoUnblockLocal = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack]
+       EvGoSysExitLocal = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp]
+       EvCount          = 41
  )
  
  var EventDescriptions = [EvCount]struct {
@@ -850,27 +867,27 @@ var EventDescriptions = [EvCount]struct {
         Args       []string
  }{
         EvNone:           {"None", 1005, false, []string{}},
-       EvBatch:          {"Batch", 1005, false, []string{"p", "seq", "ticks"}},
-       EvFrequency:      {"Frequency", 1005, false, []string{"freq", "unused"}},
+       EvBatch:          {"Batch", 1005, false, []string{"p", "ticks"}}, // in 1.5 format it was {"p", "seq", "ticks"}
+       EvFrequency:      {"Frequency", 1005, false, []string{"freq"}},   // in 1.5 format it was {"freq", "unused"}
         EvStack:          {"Stack", 1005, false, []string{"id", "siz"}},
         EvGomaxprocs:     {"Gomaxprocs", 1005, true, []string{"procs"}},
         EvProcStart:      {"ProcStart", 1005, false, []string{"thread"}},
         EvProcStop:       {"ProcStop", 1005, false, []string{}},
-       EvGCStart:        {"GCStart", 1005, true, []string{}},
+       EvGCStart:        {"GCStart", 1005, true, []string{"seq"}}, // in 1.5 format it was {}
         EvGCDone:         {"GCDone", 1005, false, []string{}},
         EvGCScanStart:    {"GCScanStart", 1005, false, []string{}},
         EvGCScanDone:     {"GCScanDone", 1005, false, []string{}},
         EvGCSweepStart:   {"GCSweepStart", 1005, true, []string{}},
         EvGCSweepDone:    {"GCSweepDone", 1005, false, []string{}},
         EvGoCreate:       {"GoCreate", 1005, true, []string{"g", "stack"}},
-       EvGoStart:        {"GoStart", 1005, false, []string{"g"}},
+       EvGoStart:        {"GoStart", 1005, false, []string{"g", "seq"}}, // in 1.5 format it was {"g"}
         EvGoEnd:          {"GoEnd", 1005, false, []string{}},
         EvGoStop:         {"GoStop", 1005, true, []string{}},
         EvGoSched:        {"GoSched", 1005, true, []string{}},
         EvGoPreempt:      {"GoPreempt", 1005, true, []string{}},
         EvGoSleep:        {"GoSleep", 1005, true, []string{}},
         EvGoBlock:        {"GoBlock", 1005, true, []string{}},
-       EvGoUnblock:      {"GoUnblock", 1005, true, []string{"g"}},
+       EvGoUnblock:      {"GoUnblock", 1005, true, []string{"g", "seq"}}, // in 1.5 format it was {"g"}
         EvGoBlockSend:    {"GoBlockSend", 1005, true, []string{}},
         EvGoBlockRecv:    {"GoBlockRecv", 1005, true, []string{}},
         EvGoBlockSelect:  {"GoBlockSelect", 1005, true, []string{}},
@@ -884,7 +901,10 @@ var EventDescriptions = [EvCount]struct {
         EvGoInSyscall:    {"GoInSyscall", 1005, false, []string{"g"}},
         EvHeapAlloc:      {"HeapAlloc", 1005, false, []string{"mem"}},
         EvNextGC:         {"NextGC", 1005, false, []string{"mem"}},
-       EvTimerGoroutine: {"TimerGoroutine", 1005, false, []string{"g", "unused"}},
+       EvTimerGoroutine: {"TimerGoroutine", 1005, false, []string{"g"}}, // in 1.5 format it was {"g", "unused"}
         EvFutileWakeup:   {"FutileWakeup", 1005, false, []string{}},
         EvString:         {"String", 1007, false, []string{}},
+       EvGoStartLocal:   {"GoStartLocal", 1007, false, []string{"g"}},
+       EvGoUnblockLocal: {"GoUnblockLocal", 1007, true, []string{"g"}},
+       EvGoSysExitLocal: {"GoSysExitLocal", 1007, false, []string{"g", "ts"}},
  }
diff --git a/src/internal/trace/parser_test.go b/src/internal/trace/parser_test.go

index 337d5a85d74e1a33c23f33a5f03563f133ab025e..340f1064843a9bc8df54dd6bbcbbb54f0bd428aa 100644 (file)
--- a/src/internal/trace/parser_test.go
+++ b/src/internal/trace/parser_test.go
@@ -89,10 +89,10 @@ func TestParseVersion(t *testing.T) {
  func TestTimestampOverflow(t *testing.T) {
         // Test that parser correctly handles large timestamps (long tracing).
         w := newWriter()
-       w.emit(EvBatch, 0, 0, 0)
-       w.emit(EvFrequency, 1e9, 0)
+       w.emit(EvBatch, 0, 0)
+       w.emit(EvFrequency, 1e9)
         for ts := uint64(1); ts < 1e16; ts *= 2 {
-               w.emit(EvGoCreate, 1, ts, ts, 1, 0)
+               w.emit(EvGoCreate, ts, ts, 0, 0)
         }
         if _, err := Parse(w, ""); err != nil {
                 t.Fatalf("failed to parse: %v", err)
@@ -110,7 +110,7 @@ func newWriter() *writer {
  }
  
  func (w *writer) emit(typ byte, args ...uint64) {
-       nargs := byte(len(args)) - 2
+       nargs := byte(len(args)) - 1
         if nargs > 3 {
                 nargs = 3
         }
diff --git a/src/runtime/proc.go b/src/runtime/proc.go

index 98a986cd63758671763a44cc3c497ac351a5a9fc..a847823da4b476b6b596b228113c48e1e007dbb5 100644 (file)
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -1796,23 +1796,7 @@ func execute(gp *g, inheritTime bool) {
                 // GoSysExit has to happen when we have a P, but before GoStart.
                 // So we emit it here.
                 if gp.syscallsp != 0 && gp.sysblocktraced {
-                       // Since gp.sysblocktraced is true, we must emit an event.
-                       // There is a race between the code that initializes sysexitseq
-                       // and sysexitticks (in exitsyscall, which runs without a P,
-                       // and therefore is not stopped with the rest of the world)
-                       // and the code that initializes a new trace.
-                       // The recorded sysexitseq and sysexitticks must therefore
-                       // be treated as "best effort". If they are valid for this trace,
-                       // then great, use them for greater accuracy.
-                       // But if they're not valid for this trace, assume that the
-                       // trace was started after the actual syscall exit (but before
-                       // we actually managed to start the goroutine, aka right now),
-                       // and assign a fresh time stamp to keep the log consistent.
-                       seq, ts := gp.sysexitseq, gp.sysexitticks
-                       if seq == 0 || int64(seq)-int64(trace.seqStart) < 0 {
-                               seq, ts = tracestamp()
-                       }
-                       traceGoSysExit(seq, ts)
+                       traceGoSysExit(gp.sysexitticks)
                 }
                 traceGoStart()
         }
@@ -2481,7 +2465,6 @@ func exitsyscall(dummy int32) {
         }
  
         _g_.sysexitticks = 0
-       _g_.sysexitseq = 0
         if trace.enabled {
                 // Wait till traceGoSysBlock event is emitted.
                 // This ensures consistency of the trace (the goroutine is started after it is blocked).
@@ -2492,7 +2475,7 @@ func exitsyscall(dummy int32) {
                 // Tracing code can invoke write barriers that cannot run without a P.
                 // So instead we remember the syscall exit time and emit the event
                 // in execute when we have a P.
-               _g_.sysexitseq, _g_.sysexitticks = tracestamp()
+               _g_.sysexitticks = cputicks()
         }
  
         _g_.m.locks--
@@ -2540,7 +2523,7 @@ func exitsyscallfast() bool {
                                         // Denote blocking of the new syscall.
                                         traceGoSysBlock(_g_.m.p.ptr())
                                         // Denote completion of the current syscall.
-                                       traceGoSysExit(tracestamp())
+                                       traceGoSysExit(0)
                                 })
                         }
                         _g_.m.p.ptr().syscalltick++
@@ -2564,7 +2547,7 @@ func exitsyscallfast() bool {
                                                 osyield()
                                         }
                                 }
-                               traceGoSysExit(tracestamp())
+                               traceGoSysExit(0)
                         }
                 })
                 if ok {
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go

index 0fdea400de4f787523f23d7b700604c682d9e2e3..8cfe6b06e613a1b4bf25c2a6cc4ea59c99c6fdfa 100644 (file)
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -332,16 +332,17 @@ type g struct {
         waitsince      int64  // approx time when the g become blocked
         waitreason     string // if status==Gwaiting
         schedlink      guintptr
-       preempt        bool   // preemption signal, duplicates stackguard0 = stackpreempt
-       paniconfault   bool   // panic (instead of crash) on unexpected fault address
-       preemptscan    bool   // preempted g does scan for gc
-       gcscandone     bool   // g has scanned stack; protected by _Gscan bit in status
-       gcscanvalid    bool   // false at start of gc cycle, true if G has not run since last scan
-       throwsplit     bool   // must not split stack
-       raceignore     int8   // ignore race detection events
-       sysblocktraced bool   // StartTrace has emitted EvGoInSyscall about this goroutine
-       sysexitticks   int64  // cputicks when syscall has returned (for tracing)
-       sysexitseq     uint64 // trace seq when syscall has returned (for tracing)
+       preempt        bool     // preemption signal, duplicates stackguard0 = stackpreempt
+       paniconfault   bool     // panic (instead of crash) on unexpected fault address
+       preemptscan    bool     // preempted g does scan for gc
+       gcscandone     bool     // g has scanned stack; protected by _Gscan bit in status
+       gcscanvalid    bool     // false at start of gc cycle, true if G has not run since last scan
+       throwsplit     bool     // must not split stack
+       raceignore     int8     // ignore race detection events
+       sysblocktraced bool     // StartTrace has emitted EvGoInSyscall about this goroutine
+       sysexitticks   int64    // cputicks when syscall has returned (for tracing)
+       traceseq       uint64   // trace event sequencer
+       tracelastp     puintptr // last P emitted an event for this goroutine
         lockedm        *m
         sig            uint32
         writebuf       []byte
diff --git a/src/runtime/trace.go b/src/runtime/trace.go

index 06fbdfac940ad20dd88a2991e21815cd53fe57fe..092f941f0c398219a2c45cf453d5e89315d403cc 100644 (file)
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -13,7 +13,6 @@
  package runtime
  
  import (
-       "runtime/internal/atomic"
         "runtime/internal/sys"
         "unsafe"
  )
@@ -27,21 +26,21 @@ const (
         traceEvGomaxprocs     = 4  // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id]
         traceEvProcStart      = 5  // start of P [timestamp, thread id]
         traceEvProcStop       = 6  // stop of P [timestamp]
-       traceEvGCStart        = 7  // GC start [timestamp, stack id]
+       traceEvGCStart        = 7  // GC start [timestamp, seq, stack id]
         traceEvGCDone         = 8  // GC done [timestamp]
         traceEvGCScanStart    = 9  // GC scan start [timestamp]
         traceEvGCScanDone     = 10 // GC scan done [timestamp]
         traceEvGCSweepStart   = 11 // GC sweep start [timestamp, stack id]
         traceEvGCSweepDone    = 12 // GC sweep done [timestamp]
         traceEvGoCreate       = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
-       traceEvGoStart        = 14 // goroutine starts running [timestamp, goroutine id]
+       traceEvGoStart        = 14 // goroutine starts running [timestamp, goroutine id, seq]
         traceEvGoEnd          = 15 // goroutine ends [timestamp]
         traceEvGoStop         = 16 // goroutine stops (like in select{}) [timestamp, stack]
         traceEvGoSched        = 17 // goroutine calls Gosched [timestamp, stack]
         traceEvGoPreempt      = 18 // goroutine is preempted [timestamp, stack]
         traceEvGoSleep        = 19 // goroutine calls Sleep [timestamp, stack]
         traceEvGoBlock        = 20 // goroutine blocks [timestamp, stack]
-       traceEvGoUnblock      = 21 // goroutine is unblocked [timestamp, goroutine id, stack]
+       traceEvGoUnblock      = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack]
         traceEvGoBlockSend    = 22 // goroutine blocks on chan send [timestamp, stack]
         traceEvGoBlockRecv    = 23 // goroutine blocks on chan recv [timestamp, stack]
         traceEvGoBlockSelect  = 24 // goroutine blocks on select [timestamp, stack]
@@ -49,7 +48,7 @@ const (
         traceEvGoBlockCond    = 26 // goroutine blocks on Cond [timestamp, stack]
         traceEvGoBlockNet     = 27 // goroutine blocks on network [timestamp, stack]
         traceEvGoSysCall      = 28 // syscall enter [timestamp, stack]
-       traceEvGoSysExit      = 29 // syscall exit [timestamp, goroutine id, real timestamp]
+       traceEvGoSysExit      = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp]
         traceEvGoSysBlock     = 30 // syscall blocks [timestamp]
         traceEvGoWaiting      = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id]
         traceEvGoInSyscall    = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id]
@@ -58,7 +57,10 @@ const (
         traceEvTimerGoroutine = 35 // denotes timer goroutine [timer goroutine id]
         traceEvFutileWakeup   = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp]
         traceEvString         = 37 // string dictionary entry [ID, length, string]
-       traceEvCount          = 38
+       traceEvGoStartLocal   = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id]
+       traceEvGoUnblockLocal = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack]
+       traceEvGoSysExitLocal = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp]
+       traceEvCount          = 41
  )
  
  const (
@@ -105,6 +107,7 @@ var trace struct {
         ticksEnd      int64       // cputicks when tracing was stopped
         timeStart     int64       // nanotime when tracing was started
         timeEnd       int64       // nanotime when tracing was stopped
+       seqGC         uint64      // GC start/done sequencer
         reading       traceBufPtr // buffer currently handed off to user
         empty         traceBufPtr // stack of empty buffers
         fullHead      traceBufPtr // queue of full buffers
@@ -122,31 +125,9 @@ var trace struct {
         buf     traceBufPtr // global trace buffer, used when running without a p
  }
  
-var traceseq uint64 // global trace sequence number
-
-// tracestamp returns a consistent sequence number, time stamp pair
-// for use in a trace. We need to make sure that time stamp ordering
-// (assuming synchronized CPUs) and sequence ordering match.
-// To do that, we increment traceseq, grab ticks, and increment traceseq again.
-// We treat odd traceseq as a sign that another thread is in the middle
-// of the sequence and spin until it is done.
-// Not splitting stack to avoid preemption, just in case the call sites
-// that used to call xadd64 and cputicks are sensitive to that.
-//go:nosplit
-func tracestamp() (seq uint64, ts int64) {
-       seq = atomic.Load64(&traceseq)
-       for seq&1 != 0 || !atomic.Cas64(&traceseq, seq, seq+1) {
-               seq = atomic.Load64(&traceseq)
-       }
-       ts = cputicks()
-       atomic.Store64(&traceseq, seq+2)
-       return seq >> 1, ts
-}
-
  // traceBufHeader is per-P tracing buffer.
  type traceBufHeader struct {
         link      traceBufPtr             // in trace.empty/full
-       lastSeq   uint64                  // sequence number of last event
         lastTicks uint64                  // when we wrote the last event
         pos       int                     // next write offset in arr
         stk       [traceStackSize]uintptr // scratch buffer for traceback
@@ -194,13 +175,6 @@ func StartTrace() error {
                 return errorString("tracing is already enabled")
         }
  
-       trace.seqStart, trace.ticksStart = tracestamp()
-       trace.timeStart = nanotime()
-       trace.headerWritten = false
-       trace.footerWritten = false
-       trace.strings = make(map[string]uint64)
-       trace.stringSeq = 0
-
         // Can't set trace.enabled yet. While the world is stopped, exitsyscall could
         // already emit a delayed event (see exitTicks in exitsyscall) if we set trace.enabled here.
         // That would lead to an inconsistent trace:
@@ -213,12 +187,15 @@ func StartTrace() error {
         for _, gp := range allgs {
                 status := readgstatus(gp)
                 if status != _Gdead {
-                       traceGoCreate(gp, gp.startpc)
+                       traceGoCreate(gp, gp.startpc) // also resets gp.traceseq/tracelastp
                 }
                 if status == _Gwaiting {
+                       // traceEvGoWaiting is implied to have seq=1.
+                       gp.traceseq++
                         traceEvent(traceEvGoWaiting, -1, uint64(gp.goid))
                 }
                 if status == _Gsyscall {
+                       gp.traceseq++
                         traceEvent(traceEvGoInSyscall, -1, uint64(gp.goid))
                 } else {
                         gp.sysblocktraced = false
@@ -226,6 +203,17 @@ func StartTrace() error {
         }
         traceProcStart()
         traceGoStart()
+       // Note: ticksStart needs to be set after we emit traceEvGoInSyscall events.
+       // If we do it the other way around, it is possible that exitsyscall will
+       // query sysexitticks after ticksStart but before traceEvGoInSyscall timestamp.
+       // It will lead to a false conclusion that cputicks is broken.
+       trace.ticksStart = cputicks()
+       trace.timeStart = nanotime()
+       trace.headerWritten = false
+       trace.footerWritten = false
+       trace.strings = make(map[string]uint64)
+       trace.stringSeq = 0
+       trace.seqGC = 0
         _g_.m.startingtrace = false
         trace.enabled = true
  
@@ -382,11 +370,9 @@ func ReadTrace() []byte {
                 var data []byte
                 data = append(data, traceEvFrequency|0<<traceArgCountShift)
                 data = traceAppend(data, uint64(freq))
-               data = traceAppend(data, 0)
                 if timers.gp != nil {
                         data = append(data, traceEvTimerGoroutine|0<<traceArgCountShift)
                         data = traceAppend(data, uint64(timers.gp.goid))
-                       data = traceAppend(data, 0)
                 }
                 // This will emit a bunch of full buffers, we will pick them up
                 // on the next iteration.
@@ -494,19 +480,14 @@ func traceEvent(ev byte, skip int, args ...uint64) {
                 (*bufp).set(buf)
         }
  
-       seq, ticksraw := tracestamp()
-       seqDiff := seq - buf.lastSeq
-       ticks := uint64(ticksraw) / traceTickDiv
+       ticks := uint64(cputicks()) / traceTickDiv
         tickDiff := ticks - buf.lastTicks
         if buf.pos == 0 {
                 buf.byte(traceEvBatch | 1<<traceArgCountShift)
                 buf.varint(uint64(pid))
-               buf.varint(seq)
                 buf.varint(ticks)
-               seqDiff = 0
                 tickDiff = 0
         }
-       buf.lastSeq = seq
         buf.lastTicks = ticks
         narg := byte(len(args))
         if skip >= 0 {
@@ -525,7 +506,6 @@ func traceEvent(ev byte, skip int, args ...uint64) {
                 buf.varint(0)
                 lenp = &buf.arr[buf.pos-1]
         }
-       buf.varint(seqDiff)
         buf.varint(tickDiff)
         for _, a := range args {
                 buf.varint(a)
@@ -892,7 +872,8 @@ func traceProcStop(pp *p) {
  }
  
  func traceGCStart() {
-       traceEvent(traceEvGCStart, 3)
+       traceEvent(traceEvGCStart, 3, trace.seqGC)
+       trace.seqGC++
  }
  
  func traceGCDone() {
@@ -916,13 +897,23 @@ func traceGCSweepDone() {
  }
  
  func traceGoCreate(newg *g, pc uintptr) {
+       newg.traceseq = 0
+       newg.tracelastp = getg().m.p
         // +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum.
         id := trace.stackTab.put([]uintptr{pc + sys.PCQuantum})
         traceEvent(traceEvGoCreate, 2, uint64(newg.goid), uint64(id))
  }
  
  func traceGoStart() {
-       traceEvent(traceEvGoStart, -1, uint64(getg().m.curg.goid))
+       _g_ := getg().m.curg
+       _p_ := _g_.m.p
+       _g_.traceseq++
+       if _g_.tracelastp == _p_ {
+               traceEvent(traceEvGoStartLocal, -1, uint64(_g_.goid))
+       } else {
+               _g_.tracelastp = _p_
+               traceEvent(traceEvGoStart, -1, uint64(_g_.goid), _g_.traceseq)
+       }
  }
  
  func traceGoEnd() {
@@ -930,10 +921,14 @@ func traceGoEnd() {
  }
  
  func traceGoSched() {
+       _g_ := getg()
+       _g_.tracelastp = _g_.m.p
         traceEvent(traceEvGoSched, 1)
  }
  
  func traceGoPreempt() {
+       _g_ := getg()
+       _g_.tracelastp = _g_.m.p
         traceEvent(traceEvGoPreempt, 1)
  }
  
@@ -945,19 +940,37 @@ func traceGoPark(traceEv byte, skip int, gp *g) {
  }
  
  func traceGoUnpark(gp *g, skip int) {
-       traceEvent(traceEvGoUnblock, skip, uint64(gp.goid))
+       _p_ := getg().m.p
+       gp.traceseq++
+       if gp.tracelastp == _p_ {
+               traceEvent(traceEvGoUnblockLocal, skip, uint64(gp.goid))
+       } else {
+               gp.tracelastp = _p_
+               traceEvent(traceEvGoUnblock, skip, uint64(gp.goid), gp.traceseq)
+       }
  }
  
  func traceGoSysCall() {
         traceEvent(traceEvGoSysCall, 1)
  }
  
-func traceGoSysExit(seq uint64, ts int64) {
-       if int64(seq)-int64(trace.seqStart) < 0 {
-               // The timestamp was obtained during a previous tracing session, ignore.
-               return
-       }
-       traceEvent(traceEvGoSysExit, -1, uint64(getg().m.curg.goid), seq, uint64(ts)/traceTickDiv)
+func traceGoSysExit(ts int64) {
+       if ts != 0 && ts < trace.ticksStart {
+               // There is a race between the code that initializes sysexitticks
+               // (in exitsyscall, which runs without a P, and therefore is not
+               // stopped with the rest of the world) and the code that initializes
+               // a new trace. The recorded sysexitticks must therefore be treated
+               // as "best effort". If they are valid for this trace, then great,
+               // use them for greater accuracy. But if they're not valid for this
+               // trace, assume that the trace was started after the actual syscall
+               // exit (but before we actually managed to start the goroutine,
+               // aka right now), and assign a fresh time stamp to keep the log consistent.
+               ts = 0
+       }
+       _g_ := getg().m.curg
+       _g_.traceseq++
+       _g_.tracelastp = _g_.m.p
+       traceEvent(traceEvGoSysExit, -1, uint64(_g_.goid), _g_.traceseq, uint64(ts)/traceTickDiv)
  }
  
  func traceGoSysBlock(pp *p) {
author	Dmitry Vyukov <dvyukov@google.com>
	Tue, 5 Apr 2016 13:29:14 +0000 (15:29 +0200)
committer	Dmitry Vyukov <dvyukov@google.com>
	Sat, 23 Apr 2016 15:57:05 +0000 (15:57 +0000)
src/internal/trace/order.go	[new file with mode: 0644]	patch \| blob
src/internal/trace/parser.go		patch \| blob \| history
src/internal/trace/parser_test.go		patch \| blob \| history
src/runtime/proc.go		patch \| blob \| history
src/runtime/runtime2.go		patch \| blob \| history
src/runtime/trace.go		patch \| blob \| history