From 46b7557836c8a7f8a481946eebbf4a1b0947b332 Mon Sep 17 00:00:00 2001 From: Jeremy Faller Date: Wed, 9 Oct 2019 10:44:33 -0400 Subject: [PATCH] cmd/compile: walk progs to generate debug_lines data MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Walking the progs is simpler than using the is_stmt symbol shenanigans. This is a reinstatement of CL 196661, which was rolled back due to tests failing. Unlike that original CL, this change should output the same debug_lines data the original approach wrote. The stats for JUST this CLC, note teh small speedup in compilation, and the lack of difference in binary size. name old time/op new time/op delta Template 229ms ± 4% 218ms ± 1% -4.95% (p=0.000 n=10+8) Unicode 92.6ms ± 9% 88.6ms ±13% ~ (p=0.089 n=10+10) GoTypes 850ms ± 2% 831ms ± 4% -2.23% (p=0.009 n=10+10) Compiler 3.99s ± 1% 3.93s ± 1% -1.29% (p=0.000 n=10+9) SSA 13.7s ± 1% 13.7s ± 1% ~ (p=0.912 n=10+10) Flate 140ms ± 3% 138ms ± 3% -1.90% (p=0.009 n=10+10) GoParser 172ms ± 2% 169ms ± 4% ~ (p=0.095 n=9+10) Reflect 530ms ± 3% 516ms ± 5% ~ (p=0.052 n=10+10) Tar 202ms ± 1% 196ms ± 3% -2.83% (p=0.002 n=9+10) XML 280ms ± 3% 270ms ± 4% -3.48% (p=0.009 n=10+10) LinkCompiler 927ms ± 2% 907ms ± 4% ~ (p=0.052 n=10+10) ExternalLinkCompiler 1.97s ± 2% 1.97s ± 3% ~ (p=0.853 n=10+10) LinkWithoutDebugCompiler 549ms ± 3% 543ms ± 5% ~ (p=0.481 n=10+10) StdCmd 12.0s ± 1% 12.0s ± 1% ~ (p=0.905 n=9+10) name old user-time/op new user-time/op delta Template 372ms ±18% 344ms ±11% ~ (p=0.190 n=10+10) Unicode 264ms ±23% 241ms ±43% ~ (p=0.315 n=8+10) GoTypes 1.56s ±22% 1.68s ± 5% ~ (p=0.237 n=10+8) Compiler 7.41s ± 2% 7.31s ± 3% ~ (p=0.123 n=10+10) SSA 24.5s ± 2% 24.7s ± 1% ~ (p=0.133 n=10+9) Flate 199ms ± 6% 188ms ±28% ~ (p=0.353 n=10+10) GoParser 243ms ±11% 240ms ± 6% ~ (p=0.968 n=10+9) Reflect 929ms ±21% 862ms ±35% ~ (p=0.190 n=10+10) Tar 284ms ± 9% 296ms ±17% ~ (p=0.497 n=9+10) XML 386ms ±21% 398ms ±28% ~ (p=1.000 n=9+10) LinkCompiler 1.13s ± 9% 1.12s ± 8% ~ (p=0.546 n=9+9) ExternalLinkCompiler 2.37s ±15% 2.30s ± 9% ~ (p=0.549 n=10+9) LinkWithoutDebugCompiler 646ms ±10% 642ms ±13% ~ (p=0.853 n=10+10) name old alloc/op new alloc/op delta Template 36.5MB ± 0% 36.5MB ± 0% -0.11% (p=0.000 n=10+9) Unicode 28.5MB ± 0% 28.5MB ± 0% ~ (p=0.190 n=10+10) GoTypes 121MB ± 0% 121MB ± 0% -0.10% (p=0.000 n=9+10) Compiler 549MB ± 0% 549MB ± 0% -0.10% (p=0.000 n=9+10) SSA 1.92GB ± 0% 1.92GB ± 0% -0.13% (p=0.000 n=10+10) Flate 23.0MB ± 0% 23.0MB ± 0% -0.07% (p=0.000 n=10+10) GoParser 27.9MB ± 0% 27.9MB ± 0% -0.09% (p=0.000 n=10+10) Reflect 77.9MB ± 0% 77.8MB ± 0% -0.13% (p=0.000 n=9+10) Tar 34.5MB ± 0% 34.4MB ± 0% -0.09% (p=0.000 n=10+10) XML 44.3MB ± 0% 44.3MB ± 0% -0.08% (p=0.000 n=10+10) LinkCompiler 229MB ± 0% 225MB ± 0% -1.74% (p=0.000 n=10+10) ExternalLinkCompiler 233MB ± 0% 242MB ± 0% +3.81% (p=0.000 n=10+10) LinkWithoutDebugCompiler 156MB ± 0% 152MB ± 0% -2.29% (p=0.000 n=10+9) name old allocs/op new allocs/op delta Template 373k ± 0% 373k ± 0% -0.21% (p=0.000 n=10+10) Unicode 340k ± 0% 340k ± 0% -0.04% (p=0.000 n=10+10) GoTypes 1.33M ± 0% 1.33M ± 0% -0.20% (p=0.000 n=10+9) Compiler 5.39M ± 0% 5.38M ± 0% -0.16% (p=0.000 n=10+10) SSA 18.3M ± 0% 18.2M ± 0% -0.15% (p=0.000 n=10+10) Flate 235k ± 0% 234k ± 0% -0.23% (p=0.000 n=10+7) GoParser 309k ± 0% 308k ± 0% -0.20% (p=0.000 n=10+10) Reflect 970k ± 0% 968k ± 0% -0.30% (p=0.000 n=10+10) Tar 347k ± 0% 347k ± 0% -0.22% (p=0.000 n=10+10) XML 425k ± 0% 424k ± 0% -0.16% (p=0.000 n=10+10) LinkCompiler 602k ± 0% 601k ± 0% -0.03% (p=0.000 n=9+10) ExternalLinkCompiler 1.65M ± 0% 1.65M ± 0% -0.02% (p=0.000 n=10+10) LinkWithoutDebugCompiler 220k ± 0% 220k ± 0% -0.03% (p=0.016 n=10+9) name old object-bytes new object-bytes delta Template 553kB ± 0% 553kB ± 0% -0.01% (p=0.000 n=10+10) Unicode 215kB ± 0% 215kB ± 0% ~ (all equal) GoTypes 2.02MB ± 0% 2.02MB ± 0% -0.00% (p=0.000 n=10+10) Compiler 7.98MB ± 0% 7.98MB ± 0% -0.01% (p=0.000 n=10+10) SSA 27.1MB ± 0% 27.1MB ± 0% -0.00% (p=0.000 n=10+10) Flate 340kB ± 0% 340kB ± 0% -0.01% (p=0.000 n=10+10) GoParser 434kB ± 0% 434kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.34MB ± 0% 1.34MB ± 0% -0.01% (p=0.000 n=10+10) Tar 479kB ± 0% 479kB ± 0% -0.00% (p=0.000 n=10+10) XML 618kB ± 0% 618kB ± 0% -0.01% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 20.4kB ± 0% 20.4kB ± 0% ~ (all equal) Unicode 8.21kB ± 0% 8.21kB ± 0% ~ (all equal) GoTypes 36.6kB ± 0% 36.6kB ± 0% ~ (all equal) Compiler 116kB ± 0% 116kB ± 0% +0.00% (p=0.000 n=10+10) SSA 141kB ± 0% 141kB ± 0% +0.00% (p=0.000 n=10+10) Flate 5.10kB ± 0% 5.10kB ± 0% ~ (all equal) GoParser 8.92kB ± 0% 8.92kB ± 0% ~ (all equal) Reflect 11.8kB ± 0% 11.8kB ± 0% ~ (all equal) Tar 10.9kB ± 0% 10.9kB ± 0% ~ (all equal) XML 17.4kB ± 0% 17.4kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 742kB ± 0% 742kB ± 0% ~ (all equal) CmdGoSize 10.6MB ± 0% 10.6MB ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 10.7kB ± 0% 10.7kB ± 0% ~ (all equal) CmdGoSize 312kB ± 0% 312kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 122kB ± 0% 122kB ± 0% ~ (all equal) CmdGoSize 146kB ± 0% 146kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.10MB ± 0% 1.10MB ± 0% ~ (all equal) CmdGoSize 14.9MB ± 0% 14.9MB ± 0% -0.03% (p=0.000 n=10+10) Change-Id: Ie078a42b29353b96654fa1f0f47d600b5a53762d Reviewed-on: https://go-review.googlesource.com/c/go/+/200017 Reviewed-by: Jeremy Faller Reviewed-by: David Chase Run-TryBot: Jeremy Faller TryBot-Result: Gobot Gobot --- src/cmd/internal/obj/dwarf.go | 104 ++++++++++++---------------------- src/cmd/internal/obj/pcln.go | 34 ----------- 2 files changed, 36 insertions(+), 102 deletions(-) diff --git a/src/cmd/internal/obj/dwarf.go b/src/cmd/internal/obj/dwarf.go index d8f3de3b69..4118c6442c 100644 --- a/src/cmd/internal/obj/dwarf.go +++ b/src/cmd/internal/obj/dwarf.go @@ -8,6 +8,7 @@ package obj import ( "cmd/internal/dwarf" + "cmd/internal/src" "fmt" ) @@ -48,78 +49,45 @@ func (ctxt *Link) generateDebugLinesSymbol(s, lines *LSym) { // generating the line table. See below. // TODO: Once delve can support multiple DW_LNS_end_statements, we don't have // to do this. - is_stmt := uint8(1) + stmt := true + line := int64(1) pc := s.Func.Text.Pc - line := 1 - file := 1 - - // The linker will insert the DW_LNE_set_address once determined; therefore, - // it's omitted here. - - // Generate the actual line information. - // We use the pcline and pcfile to generate this section, and it's suboptimal. - // Likely better would be to generate this dirrectly from the progs and not - // parse those tables. - // TODO: Generate from the progs if it's faster. - pcfile := NewPCIter(uint32(ctxt.Arch.Arch.MinLC)) - pcline := NewPCIter(uint32(ctxt.Arch.Arch.MinLC)) - pcstmt := NewPCIter(uint32(ctxt.Arch.Arch.MinLC)) - pcfile.Init(s.Func.Pcln.Pcfile.P) - pcline.Init(s.Func.Pcln.Pcline.P) - var pctostmtData Pcdata - funcpctab(ctxt, &pctostmtData, s, "pctostmt", pctostmt, nil) - pcstmt.Init(pctostmtData.P) - var thispc uint32 - - for !pcfile.Done && !pcline.Done { - // Only changed if it advanced - if int32(file) != pcfile.Value { - dctxt.AddUint8(lines, dwarf.DW_LNS_set_file) - dwarf.Uleb128put(dctxt, lines, fileNums[pcfile.Value]) - file = int(pcfile.Value) + name := "" + prologue, wrotePrologue := false, false + // Walk the progs, generating the DWARF table. + for p := s.Func.Text; p != nil; p = p.Link { + prologue = prologue || (p.Pos.Xlogue() == src.PosPrologueEnd) + // If we're not at a real instruction, keep looping! + if p.Pos.Line() == 0 || (p.Link != nil && p.Link.Pc == p.Pc) { + continue } + newStmt := p.Pos.IsStmt() != src.PosNotStmt + newName, newLine := linkgetlineFromPos(ctxt, p.Pos) - // Only changed if it advanced - if is_stmt != uint8(pcstmt.Value) { - new_stmt := uint8(pcstmt.Value) - switch new_stmt &^ 1 { - case PrologueEnd: - dctxt.AddUint8(lines, uint8(dwarf.DW_LNS_set_prologue_end)) - case EpilogueBegin: - // TODO if there is a use for this, add it. - // Don't forget to increase OPCODE_BASE by 1 and add entry for standard_opcode_lengths[11] - panic("unsupported EpilogueBegin") - } - new_stmt &= 1 - if is_stmt != new_stmt { - is_stmt = new_stmt - dctxt.AddUint8(lines, uint8(dwarf.DW_LNS_negate_stmt)) - } + // Output debug info. + wrote := false + if name != newName { + newFile := ctxt.PosTable.FileIndex(newName) + 1 // 1 indexing for the table. + dctxt.AddUint8(lines, dwarf.DW_LNS_set_file) + dwarf.Uleb128put(dctxt, lines, int64(newFile)) + name = newName + wrote = true } - - // putpcldelta makes a row in the DWARF matrix, always, even if line is unchanged. - putpclcdelta(ctxt, dctxt, lines, uint64(s.Func.Text.Pc+int64(thispc)-pc), int64(pcline.Value)-int64(line)) - - pc = s.Func.Text.Pc + int64(thispc) - line = int(pcline.Value) - - // Take the minimum step forward for the three iterators - thispc = pcfile.NextPC - if pcline.NextPC < thispc { - thispc = pcline.NextPC + if prologue && !wrotePrologue { + dctxt.AddUint8(lines, uint8(dwarf.DW_LNS_set_prologue_end)) + wrotePrologue = true + wrote = true } - if !pcstmt.Done && pcstmt.NextPC < thispc { - thispc = pcstmt.NextPC + if stmt != newStmt { + dctxt.AddUint8(lines, uint8(dwarf.DW_LNS_negate_stmt)) + stmt = newStmt + wrote = true } - if pcfile.NextPC == thispc { - pcfile.Next() - } - if !pcstmt.Done && pcstmt.NextPC == thispc { - pcstmt.Next() - } - if pcline.NextPC == thispc { - pcline.Next() + if line != int64(newLine) || wrote { + pcdelta := p.Pc - pc + putpclcdelta(ctxt, dctxt, lines, uint64(pcdelta), int64(newLine)-line) + line, pc = int64(newLine), p.Pc } } @@ -129,16 +97,16 @@ func (ctxt *Link) generateDebugLinesSymbol(s, lines *LSym) { // file = 1 // line = 1 // column = 0 - // is_stmt = set in header, we assume true + // stmt = set in header, we assume true // basic_block = false // Careful readers of the DWARF specification will note that we don't reset // the address of the state machine -- but this will happen at the beginning - // of the NEXT block of opcodes. (See the SetAddress call above.) + // of the NEXT block of opcodes. dctxt.AddUint8(lines, dwarf.DW_LNS_set_file) dwarf.Uleb128put(dctxt, lines, 1) dctxt.AddUint8(lines, dwarf.DW_LNS_advance_line) dwarf.Sleb128put(dctxt, lines, int64(1-line)) - if is_stmt != 1 { + if !stmt { dctxt.AddUint8(lines, dwarf.DW_LNS_negate_stmt) } dctxt.AddUint8(lines, dwarf.DW_LNS_copy) diff --git a/src/cmd/internal/obj/pcln.go b/src/cmd/internal/obj/pcln.go index ca1eda8d1e..58552b9299 100644 --- a/src/cmd/internal/obj/pcln.go +++ b/src/cmd/internal/obj/pcln.go @@ -5,16 +5,10 @@ package obj import ( - "cmd/internal/src" "encoding/binary" "log" ) -const ( - PrologueEnd = 2 + iota // overload "is_stmt" to include prologue_end - EpilogueBegin // overload "is_stmt" to include epilogue_end -) - // funcpctab writes to dst a pc-value table mapping the code in func to the values // returned by valfunc parameterized by arg. The invocation of valfunc to update the // current value is, for each p, @@ -249,34 +243,6 @@ func pctospadj(ctxt *Link, sym *LSym, oldval int32, p *Prog, phase int32, arg in return oldval + p.Spadj } -// pctostmt returns either, -// if phase==0, then whether the current instruction is a step-target (Dwarf is_stmt) -// bit-or'd with whether the current statement is a prologue end or epilogue begin -// else (phase == 1), zero. -// -func pctostmt(ctxt *Link, sym *LSym, oldval int32, p *Prog, phase int32, arg interface{}) int32 { - if phase == 1 { - return 0 // Ignored; also different from initial value of -1, if that ever matters. - } - s := p.Pos.IsStmt() - l := p.Pos.Xlogue() - - var is_stmt int32 - - // PrologueEnd, at least, is passed to the next instruction - switch l { - case src.PosPrologueEnd: - is_stmt = PrologueEnd - case src.PosEpilogueBegin: - is_stmt = EpilogueBegin - } - - if s != src.PosNotStmt { - is_stmt |= 1 // either PosDefaultStmt from asm, or PosIsStmt from go - } - return is_stmt -} - // pctopcdata computes the pcdata value in effect at p. // A PCDATA instruction sets the value in effect at future // non-PCDATA instructions. -- 2.48.1