]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: walk the progs to generate debug_lines
authorJeremy Faller <jeremy@golang.org>
Fri, 20 Sep 2019 18:02:24 +0000 (14:02 -0400)
committerJeremy Faller <jeremy@golang.org>
Thu, 3 Oct 2019 14:09:40 +0000 (14:09 +0000)
Rather than use the pcln tables, walk progs while generating
debug_lines.  This code slightly increases the number of is_stmt toggles
in the debug information due to a previous bug in how the pcline walking
worked. (Previous versions of the line walking code wouldn't return the
old_value, instead returning 0. This behavior might lose is_stmt toggles
in the line table.)

We suspected there would be a speedup with this change, but benchmarking
hasn't shown this to be true (but has been noisy enough to not really
show any large differences either way). These benchmarks are comparing
non-prog walking code with this prog-walking code:

name                      old time/op       new time/op       delta
Template                        321ms ± 4%        316ms ± 3%     ~     (p=0.165 n=10+10)
Unicode                         146ms ± 5%        142ms ± 4%     ~     (p=0.063 n=10+10)
GoTypes                         1.06s ± 2%        1.07s ± 2%     ~     (p=0.280 n=10+10)
Compiler                        4.07s ± 1%        4.06s ± 1%     ~     (p=0.549 n=10+9)
SSA                             12.6s ± 2%        12.7s ± 2%   +1.27%  (p=0.019 n=10+10)
Flate                           201ms ± 7%        202ms ± 4%     ~     (p=0.436 n=10+10)
GoParser                        248ms ± 4%        250ms ± 2%     ~     (p=0.356 n=9+10)
Reflect                         679ms ± 5%        678ms ± 4%     ~     (p=0.971 n=10+10)
Tar                             281ms ± 2%        283ms ± 3%     ~     (p=0.222 n=9+9)
XML                             381ms ± 3%        384ms ± 5%     ~     (p=0.393 n=10+10)
LinkCompiler                    1.08s ± 2%        1.10s ± 2%   +1.89%  (p=0.009 n=10+10)
ExternalLinkCompiler            2.23s ± 4%        2.23s ± 1%     ~     (p=1.000 n=10+8)
LinkWithoutDebugCompiler        654ms ± 4%        673ms ± 4%   +2.94%  (p=0.019 n=10+10)
StdCmd                          13.6s ± 2%        13.9s ± 1%   +2.00%  (p=0.000 n=10+10)

name                      old user-time/op  new user-time/op  delta
Template                        582ms ±11%        575ms ±14%     ~     (p=0.631 n=10+10)
Unicode                         431ms ±24%        390ms ±38%     ~     (p=0.315 n=10+10)
GoTypes                         2.47s ±11%        2.51s ± 4%     ~     (p=0.280 n=10+10)
Compiler                        9.09s ± 3%        9.04s ± 5%     ~     (p=0.684 n=10+10)
SSA                             25.8s ± 4%        26.0s ± 3%     ~     (p=0.529 n=10+10)
Flate                           318ms ±14%        322ms ±13%     ~     (p=0.912 n=10+10)
GoParser                        386ms ± 6%        386ms ± 5%     ~     (p=0.888 n=9+8)
Reflect                         1.42s ±20%        1.32s ±24%     ~     (p=0.393 n=10+10)
Tar                             476ms ±19%        471ms ±25%     ~     (p=1.000 n=10+10)
XML                             681ms ±25%        745ms ±21%     ~     (p=0.143 n=10+10)
LinkCompiler                    1.75s ±13%        1.86s ±12%     ~     (p=0.075 n=10+10)
ExternalLinkCompiler            2.98s ±18%        3.41s ±13%  +14.48%  (p=0.003 n=10+10)
LinkWithoutDebugCompiler        1.05s ±12%        1.08s ±16%     ~     (p=0.739 n=10+10)

name                      old alloc/op      new alloc/op      delta
Template                       36.4MB ± 0%       36.4MB ± 0%   -0.11%  (p=0.000 n=10+10)
Unicode                        28.6MB ± 0%       28.5MB ± 0%   -0.06%  (p=0.029 n=10+10)
GoTypes                         121MB ± 0%        121MB ± 0%   -0.09%  (p=0.000 n=9+9)
Compiler                        548MB ± 0%        547MB ± 0%   -0.10%  (p=0.000 n=10+10)
SSA                            1.87GB ± 0%       1.87GB ± 0%   -0.13%  (p=0.000 n=10+10)
Flate                          23.0MB ± 0%       22.9MB ± 0%   -0.09%  (p=0.000 n=9+10)
GoParser                       27.9MB ± 0%       27.8MB ± 0%   -0.12%  (p=0.000 n=10+10)
Reflect                        77.7MB ± 0%       77.6MB ± 0%   -0.12%  (p=0.000 n=8+10)
Tar                            34.5MB ± 0%       34.5MB ± 0%   -0.07%  (p=0.003 n=10+10)
XML                            44.4MB ± 0%       44.4MB ± 0%   -0.07%  (p=0.000 n=10+10)
LinkCompiler                    236MB ± 0%        240MB ± 0%   +1.72%  (p=0.000 n=10+10)
ExternalLinkCompiler            246MB ± 0%        254MB ± 0%   +3.02%  (p=0.000 n=10+10)
LinkWithoutDebugCompiler        159MB ± 0%        164MB ± 0%   +3.35%  (p=0.000 n=10+10)

name                      old allocs/op     new allocs/op     delta
Template                         372k ± 0%         371k ± 0%   -0.23%  (p=0.000 n=10+10)
Unicode                          340k ± 0%         340k ± 0%   -0.05%  (p=0.000 n=10+10)
GoTypes                         1.33M ± 0%        1.32M ± 0%   -0.20%  (p=0.000 n=9+10)
Compiler                        5.37M ± 0%        5.36M ± 0%   -0.17%  (p=0.000 n=10+10)
SSA                             17.9M ± 0%        17.9M ± 0%   -0.15%  (p=0.000 n=10+10)
Flate                            234k ± 0%         233k ± 0%   -0.24%  (p=0.000 n=9+10)
GoParser                         309k ± 0%         309k ± 0%   -0.21%  (p=0.000 n=10+10)
Reflect                          969k ± 0%         966k ± 0%   -0.30%  (p=0.000 n=9+10)
Tar                              348k ± 0%         347k ± 0%   -0.22%  (p=0.000 n=10+9)
XML                              426k ± 0%         425k ± 0%   -0.15%  (p=0.000 n=9+10)
LinkCompiler                     638k ± 0%         637k ± 0%   -0.07%  (p=0.000 n=10+10)
ExternalLinkCompiler            1.69M ± 0%        1.69M ± 0%   -0.05%  (p=0.000 n=10+10)
LinkWithoutDebugCompiler         222k ± 0%         221k ± 0%   -0.03%  (p=0.007 n=10+9)

name                      old object-bytes  new object-bytes  delta
Template                        559kB ± 0%        560kB ± 0%   +0.23%  (p=0.000 n=10+10)
Unicode                         216kB ± 0%        216kB ± 0%   +0.01%  (p=0.000 n=10+10)
GoTypes                        2.03MB ± 0%       2.04MB ± 0%   +0.31%  (p=0.000 n=10+10)
Compiler                       8.07MB ± 0%       8.10MB ± 0%   +0.35%  (p=0.000 n=10+10)
SSA                            27.1MB ± 0%       27.3MB ± 0%   +0.72%  (p=0.000 n=10+10)
Flate                           343kB ± 0%        344kB ± 0%   +0.22%  (p=0.000 n=10+10)
GoParser                        441kB ± 0%        442kB ± 0%   +0.34%  (p=0.000 n=10+10)
Reflect                        1.36MB ± 0%       1.36MB ± 0%   +0.23%  (p=0.000 n=10+10)
Tar                             487kB ± 0%        488kB ± 0%   +0.21%  (p=0.000 n=10+10)
XML                             632kB ± 0%        634kB ± 0%   +0.35%  (p=0.000 n=10+10)

name                      old export-bytes  new export-bytes  delta
Template                       18.5kB ± 0%       18.5kB ± 0%     ~     (all equal)
Unicode                        7.92kB ± 0%       7.92kB ± 0%     ~     (all equal)
GoTypes                        35.0kB ± 0%       35.0kB ± 0%     ~     (all equal)
Compiler                        109kB ± 0%        109kB ± 0%   +0.00%  (p=0.000 n=10+10)
SSA                             137kB ± 0%        137kB ± 0%   +0.00%  (p=0.000 n=10+10)
Flate                          4.89kB ± 0%       4.89kB ± 0%     ~     (all equal)
GoParser                       8.49kB ± 0%       8.49kB ± 0%     ~     (all equal)
Reflect                        11.4kB ± 0%       11.4kB ± 0%     ~     (all equal)
Tar                            10.5kB ± 0%       10.5kB ± 0%     ~     (all equal)
XML                            16.7kB ± 0%       16.7kB ± 0%     ~     (all equal)

name                      old text-bytes    new text-bytes    delta
HelloSize                       760kB ± 0%        760kB ± 0%     ~     (all equal)
CmdGoSize                      10.8MB ± 0%       10.8MB ± 0%     ~     (all equal)

name                      old data-bytes    new data-bytes    delta
HelloSize                      10.7kB ± 0%       10.7kB ± 0%     ~     (all equal)
CmdGoSize                       312kB ± 0%        312kB ± 0%     ~     (all equal)

name                      old bss-bytes     new bss-bytes     delta
HelloSize                       122kB ± 0%        122kB ± 0%     ~     (all equal)
CmdGoSize                       146kB ± 0%        146kB ± 0%     ~     (all equal)

name                      old exe-bytes     new exe-bytes     delta
HelloSize                      1.13MB ± 0%       1.13MB ± 0%     ~     (all equal)
CmdGoSize                      15.0MB ± 0%       15.1MB ± 0%   +0.22%  (p=0.000 n=10+10)

Change-Id: If6e0982cd1398062a88e6c0c7513e141f9503531
Reviewed-on: https://go-review.googlesource.com/c/go/+/196661
Run-TryBot: Jeremy Faller <jeremy@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
src/cmd/internal/obj/dwarf.go
src/cmd/internal/obj/pcln.go

index d8f3de3b692c3daf0b44289287f5eac4cfee31d0..5efafe11b86cb2d9206e09c25733b4863e750949 100644 (file)
@@ -8,6 +8,7 @@ package obj
 
 import (
        "cmd/internal/dwarf"
+       "cmd/internal/src"
        "fmt"
 )
 
@@ -31,95 +32,51 @@ const (
 func (ctxt *Link) generateDebugLinesSymbol(s, lines *LSym) {
        dctxt := dwCtxt{ctxt}
 
-       // The Pcfile table is used to generate the debug_lines section, and the file
-       // indices for that data could differ from the files we write out for the
-       // debug_lines section. Here we generate a LUT between those two indices.
-       fileNums := make(map[int32]int64)
-       for i, filename := range s.Func.Pcln.File {
-               if symbolIndex := ctxt.PosTable.FileIndex(filename); symbolIndex >= 0 {
-                       fileNums[int32(i)] = int64(symbolIndex) + 1
-               } else {
-                       panic(fmt.Sprintf("First time we've seen filename: %q", filename))
-               }
-       }
-
        // Set up the debug_lines state machine.
        // NB: This state machine is reset to this state when we've finished
        // generating the line table. See below.
        // TODO: Once delve can support multiple DW_LNS_end_statements, we don't have
        // to do this.
-       is_stmt := uint8(1)
+       stmt := true
+       line := int64(1)
        pc := s.Func.Text.Pc
-       line := 1
-       file := 1
-
-       // The linker will insert the DW_LNE_set_address once determined; therefore,
-       // it's omitted here.
-
-       // Generate the actual line information.
-       // We use the pcline and pcfile to generate this section, and it's suboptimal.
-       // Likely better would be to generate this dirrectly from the progs and not
-       // parse those tables.
-       // TODO: Generate from the progs if it's faster.
-       pcfile := NewPCIter(uint32(ctxt.Arch.Arch.MinLC))
-       pcline := NewPCIter(uint32(ctxt.Arch.Arch.MinLC))
-       pcstmt := NewPCIter(uint32(ctxt.Arch.Arch.MinLC))
-       pcfile.Init(s.Func.Pcln.Pcfile.P)
-       pcline.Init(s.Func.Pcln.Pcline.P)
-       var pctostmtData Pcdata
-       funcpctab(ctxt, &pctostmtData, s, "pctostmt", pctostmt, nil)
-       pcstmt.Init(pctostmtData.P)
-       var thispc uint32
-
-       for !pcfile.Done && !pcline.Done {
-               // Only changed if it advanced
-               if int32(file) != pcfile.Value {
-                       dctxt.AddUint8(lines, dwarf.DW_LNS_set_file)
-                       dwarf.Uleb128put(dctxt, lines, fileNums[pcfile.Value])
-                       file = int(pcfile.Value)
+       name := ""
+       prologue, wrotePrologue := false, false
+
+       // Walk the progs, generating the DWARF table.
+       for p := s.Func.Text; p != nil; p = p.Link {
+               prologue = prologue || (p.Pos.Xlogue() == src.PosPrologueEnd)
+               // If we're not at a real instruction, keep looping!
+               if p.Pos.Line() == 0 || (p.Link != nil && p.Link.Pc == pc) {
+                       continue
                }
+               newStmt := p.Pos.IsStmt() != src.PosNotStmt
+               newName, newLine := linkgetlineFromPos(ctxt, p.Pos)
 
-               // Only changed if it advanced
-               if is_stmt != uint8(pcstmt.Value) {
-                       new_stmt := uint8(pcstmt.Value)
-                       switch new_stmt &^ 1 {
-                       case PrologueEnd:
-                               dctxt.AddUint8(lines, uint8(dwarf.DW_LNS_set_prologue_end))
-                       case EpilogueBegin:
-                               // TODO if there is a use for this, add it.
-                               // Don't forget to increase OPCODE_BASE by 1 and add entry for standard_opcode_lengths[11]
-                               panic("unsupported EpilogueBegin")
-                       }
-                       new_stmt &= 1
-                       if is_stmt != new_stmt {
-                               is_stmt = new_stmt
-                               dctxt.AddUint8(lines, uint8(dwarf.DW_LNS_negate_stmt))
-                       }
+               // Output debug info.
+               wrote := false
+               if name != newName {
+                       newFile := ctxt.PosTable.FileIndex(newName) + 1 // 1 indexing for the table.
+                       dctxt.AddUint8(lines, dwarf.DW_LNS_set_file)
+                       dwarf.Uleb128put(dctxt, lines, int64(newFile))
+                       name = newName
+                       wrote = true
                }
-
-               // putpcldelta makes a row in the DWARF matrix, always, even if line is unchanged.
-               putpclcdelta(ctxt, dctxt, lines, uint64(s.Func.Text.Pc+int64(thispc)-pc), int64(pcline.Value)-int64(line))
-
-               pc = s.Func.Text.Pc + int64(thispc)
-               line = int(pcline.Value)
-
-               // Take the minimum step forward for the three iterators
-               thispc = pcfile.NextPC
-               if pcline.NextPC < thispc {
-                       thispc = pcline.NextPC
+               if prologue && !wrotePrologue {
+                       dctxt.AddUint8(lines, uint8(dwarf.DW_LNS_set_prologue_end))
+                       wrotePrologue = true
+                       wrote = true
                }
-               if !pcstmt.Done && pcstmt.NextPC < thispc {
-                       thispc = pcstmt.NextPC
+               if stmt != newStmt {
+                       dctxt.AddUint8(lines, uint8(dwarf.DW_LNS_negate_stmt))
+                       stmt = newStmt
+                       wrote = true
                }
 
-               if pcfile.NextPC == thispc {
-                       pcfile.Next()
-               }
-               if !pcstmt.Done && pcstmt.NextPC == thispc {
-                       pcstmt.Next()
-               }
-               if pcline.NextPC == thispc {
-                       pcline.Next()
+               if line != int64(newLine) || wrote {
+                       pcdelta := (p.Pc - pc) / int64(ctxt.Arch.MinLC)
+                       putpclcdelta(ctxt, dctxt, lines, uint64(pcdelta), int64(newLine)-line)
+                       line, pc = int64(newLine), p.Pc
                }
        }
 
@@ -129,16 +86,16 @@ func (ctxt *Link) generateDebugLinesSymbol(s, lines *LSym) {
        //   file = 1
        //   line = 1
        //   column = 0
-       //   is_stmt = set in header, we assume true
+       //   stmt = set in header, we assume true
        //   basic_block = false
        // Careful readers of the DWARF specification will note that we don't reset
        // the address of the state machine -- but this will happen at the beginning
-       // of the NEXT block of opcodes. (See the SetAddress call above.)
+       // of the NEXT block of opcodes.
        dctxt.AddUint8(lines, dwarf.DW_LNS_set_file)
        dwarf.Uleb128put(dctxt, lines, 1)
        dctxt.AddUint8(lines, dwarf.DW_LNS_advance_line)
        dwarf.Sleb128put(dctxt, lines, int64(1-line))
-       if is_stmt != 1 {
+       if !stmt {
                dctxt.AddUint8(lines, dwarf.DW_LNS_negate_stmt)
        }
        dctxt.AddUint8(lines, dwarf.DW_LNS_copy)
index ca1eda8d1ecfbeb5cf6de7e90a396173b3652398..c47897a263ff8e4b634fb556d7973a8e821062ca 100644 (file)
@@ -5,7 +5,6 @@
 package obj
 
 import (
-       "cmd/internal/src"
        "encoding/binary"
        "log"
 )
@@ -249,34 +248,6 @@ func pctospadj(ctxt *Link, sym *LSym, oldval int32, p *Prog, phase int32, arg in
        return oldval + p.Spadj
 }
 
-// pctostmt returns either,
-// if phase==0, then whether the current instruction is a step-target (Dwarf is_stmt)
-//     bit-or'd with whether the current statement is a prologue end or epilogue begin
-// else (phase == 1), zero.
-//
-func pctostmt(ctxt *Link, sym *LSym, oldval int32, p *Prog, phase int32, arg interface{}) int32 {
-       if phase == 1 {
-               return 0 // Ignored; also different from initial value of -1, if that ever matters.
-       }
-       s := p.Pos.IsStmt()
-       l := p.Pos.Xlogue()
-
-       var is_stmt int32
-
-       // PrologueEnd, at least, is passed to the next instruction
-       switch l {
-       case src.PosPrologueEnd:
-               is_stmt = PrologueEnd
-       case src.PosEpilogueBegin:
-               is_stmt = EpilogueBegin
-       }
-
-       if s != src.PosNotStmt {
-               is_stmt |= 1 // either PosDefaultStmt from asm, or PosIsStmt from go
-       }
-       return is_stmt
-}
-
 // pctopcdata computes the pcdata value in effect at p.
 // A PCDATA instruction sets the value in effect at future
 // non-PCDATA instructions.