From: Robert Griesemer Date: Thu, 14 Apr 2016 00:53:03 +0000 (-0700) Subject: cmd/compile: first cut at exporting position info X-Git-Tag: go1.7beta1~667 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=5c593a3227d97f5d2afa66a39b6dd8ea6ebf73f3;p=gostls13.git cmd/compile: first cut at exporting position info - position info for all exported globals, plus methods and fields - use delta-encoded line number info in most cases - canonicalize all strings: each filename appears only once, but will also compact other strings (names) to at most one occurence in encoding - positions not yet hooked up when reading in Also: - adjusted go/importer (gcimporter) - some refactoring for better symmetry Stats: - comparison of export data size w/o and w/ position info (bytes). - delta is increase in % - overall (see bottom of table): 14% increase - however, the current binary format decreased from the original binary format last week by 14% - compared to original textual format: 65% decrease (increase by 14% after decrease by 14% still leads to a decrease from original textual format) (caveat: we used the textual size from last week, assuming it has not changed - there may be a small error here). package w/o pos w/ pos delta archive/tar 4234 4902 16% archive/zip 6387 7340 15% bufio 3106 3419 10% bytes 4362 4757 9% cmd/addr2line 27 70 159% cmd/api 12065 13590 13% cmd/asm 27 64 137% cmd/asm/internal/arch 9957 11529 16% cmd/asm/internal/asm 11788 13385 14% cmd/asm/internal/flags 239 311 30% cmd/asm/internal/lex 13415 15358 14% cmd/cgo 13064 15006 15% cmd/compile 27 67 148% cmd/compile/internal/amd64 461 869 89% cmd/compile/internal/arm 5963 7273 22% cmd/compile/internal/arm64 363 657 81% cmd/compile/internal/big 7186 8590 20% cmd/compile/internal/gc 48242 56234 17% cmd/compile/internal/mips64 367 666 81% cmd/compile/internal/ppc64 372 721 94% cmd/compile/internal/s390x 330 569 72% cmd/compile/internal/ssa 30464 35058 15% cmd/compile/internal/x86 429 770 79% cmd/cover 3984 4731 19% cmd/dist 74 154 108% cmd/doc 7272 8591 18% cmd/expdump 27 71 163% cmd/fix 342 419 23% cmd/go 8126 9520 17% cmd/gofmt 27 70 159% cmd/gofmt2 27 69 156% cmd/gofmt2/internal/format 702 856 22% cmd/gofmt2/internal/lexical 2954 3509 19% cmd/gofmt2/internal/parse 6185 7295 18% cmd/gofmt2/internal/syntax 3533 4738 34% cmd/gofmt2/internal/test 540 615 14% cmd/internal/bio 5395 6060 12% cmd/internal/gcprog 533 663 24% cmd/internal/goobj 1022 1277 25% cmd/internal/obj 10951 12825 17% cmd/internal/obj/arm 8612 9985 16% cmd/internal/obj/arm64 15814 17638 12% cmd/internal/obj/mips 10928 12487 14% cmd/internal/obj/ppc64 13576 15277 13% cmd/internal/obj/s390x 16513 18708 13% cmd/internal/obj/x86 21152 23482 11% cmd/internal/objfile 14442 16505 14% cmd/internal/pprof/commands 1663 1885 13% cmd/internal/pprof/driver 9517 10789 13% cmd/internal/pprof/fetch 7632 8635 13% cmd/internal/pprof/plugin 13150 14809 13% cmd/internal/pprof/profile 7004 8248 18% cmd/internal/pprof/report 7763 8942 15% cmd/internal/pprof/svg 1332 1534 15% cmd/internal/pprof/symbolizer 7376 8439 14% cmd/internal/pprof/symbolz 6970 7976 14% cmd/internal/pprof/tempfile 3645 4093 12% cmd/internal/sys 505 619 23% cmd/internal/unvendor/golang.org/x/arch/arm/armasm 73951 79188 7% cmd/internal/unvendor/golang.org/x/arch/x86/x86asm 10140 11738 16% cmd/link 27 64 137% cmd/link/internal/amd64 9317 11034 18% cmd/link/internal/arm 110 213 94% cmd/link/internal/arm64 112 219 96% cmd/link/internal/ld 53524 60149 12% cmd/link/internal/mips64 113 222 96% cmd/link/internal/ppc64 113 220 95% cmd/link/internal/s390x 112 219 96% cmd/link/internal/x86 110 212 93% cmd/nm 27 61 126% cmd/objdump 27 68 152% cmd/pack 4141 4688 13% cmd/pprof 27 67 148% cmd/trace 624 842 35% cmd/vet 11194 13140 17% cmd/vet/internal/whitelist 52 113 117% cmd/yacc 1141 1317 15% compress/bzip2 2101 2484 18% compress/flate 3619 4336 20% compress/gzip 6261 7111 14% compress/lzw 276 401 45% compress/zlib 3630 4158 15% container/heap 187 250 34% container/list 1370 1506 10% container/ring 466 546 17% context 3005 3338 11% crypto 728 856 18% crypto/aes 181 321 77% crypto/cipher 744 1163 56% crypto/des 220 320 45% crypto/dsa 4526 4990 10% crypto/ecdsa 5341 5982 12% crypto/elliptic 4969 5593 13% crypto/hmac 188 250 33% crypto/md5 560 706 26% crypto/rand 4218 4746 13% crypto/rc4 214 321 50% crypto/rsa 5648 6355 13% crypto/sha1 597 751 26% crypto/sha256 228 351 54% crypto/sha512 354 484 37% crypto/subtle 586 621 6% crypto/tls 20909 23438 12% crypto/x509 14862 16857 13% crypto/x509/pkix 8384 9278 11% database/sql 6721 7715 15% database/sql/driver 1243 1535 23% debug/dwarf 7867 9153 16% debug/elf 25479 28025 10% debug/gosym 1887 2267 20% debug/macho 7222 8846 22% debug/pe 6921 8081 17% debug/plan9obj 1084 1319 22% encoding 217 280 29% encoding/ascii85 587 722 23% encoding/asn1 1043 1268 22% encoding/base32 929 1112 20% encoding/base64 1166 1368 17% encoding/binary 2168 2410 11% encoding/csv 3761 4203 12% encoding/gob 11304 12936 14% encoding/hex 510 606 19% encoding/json 9965 11395 14% encoding/pem 202 266 32% encoding/xml 11817 13361 13% errors 126 170 35% expvar 930 1142 23% flag 5905 6519 10% fmt 1027 1190 16% go/ast 12910 15541 20% go/build 5460 6173 13% go/constant 1645 1816 10% go/doc 3107 3882 25% go/format 1416 1729 22% go/importer 1426 1668 17% go/internal/gccgoimporter 1624 2028 25% go/internal/gcimporter 2650 3095 17% go/parser 6220 7073 14% go/printer 1924 2306 20% go/scanner 3137 3602 15% go/token 3053 3474 14% go/types 21793 25561 17% hash 234 327 40% hash/adler32 465 553 19% hash/crc32 668 817 22% hash/crc64 630 727 15% hash/fnv 1413 1582 12% html 76 114 50% html/template 14382 16457 14% image 10248 11409 11% image/color 2247 2562 14% image/color/palette 107 169 58% image/draw 2313 2494 8% image/gif 3079 3450 12% image/internal/imageutil 3136 3456 10% image/jpeg 2349 2735 16% image/png 2404 2695 12% index/suffixarray 4978 5596 12% internal/race 225 278 24% internal/singleflight 551 697 26% internal/syscall/windows/sysdll 97 166 71% internal/testenv 4488 5052 13% internal/trace 1392 1680 21% io 2811 3318 18% io/ioutil 3988 4467 12% log 3532 3907 11% log/syslog 4247 4775 12% math 3021 4499 49% math/big 7250 8456 17% math/cmplx 1034 1617 56% math/rand 734 885 21% mime 1889 2194 16% mime/multipart 4313 4849 12% mime/quotedprintable 1758 1996 14% net 15686 18617 19% net/http 42182 47848 13% net/http/cgi 19496 21768 12% net/http/cookiejar 4615 5248 14% net/http/fcgi 17758 19771 11% net/http/httptest 26108 29350 12% net/http/httputil 20732 23286 12% net/http/internal 2195 2497 14% net/http/pprof 17596 19545 11% net/internal/socktest 1689 2153 27% net/mail 4328 4810 11% net/rpc 24328 27249 12% net/rpc/jsonrpc 11052 12438 13% net/smtp 17127 19174 12% net/textproto 3705 4329 17% net/url 1193 1371 15% os 8493 10113 19% os/exec 6625 7532 14% os/signal 137 236 72% os/user 529 761 44% path 295 372 26% path/filepath 3452 3952 14% reflect 5091 6028 18% regexp 4848 5585 15% regexp/syntax 2590 3076 19% runtime 8721 11598 33% runtime/cgo 17 17 0% runtime/debug 2721 3130 15% runtime/internal/atomic 569 704 24% runtime/internal/sys 1874 2318 24% runtime/pprof 478 582 22% runtime/race 18 18 0% runtime/trace 95 146 54% sort 1052 1215 15% strconv 1389 1667 20% strings 3372 3772 12% sync 946 1371 45% sync/atomic 962 1079 12% syscall 41574 45613 10% testing 6184 7243 17% testing/iotest 883 1116 26% testing/quick 4659 5443 17% text/scanner 2930 3269 12% text/tabwriter 2333 2607 12% text/template 13335 15274 15% text/template/parse 8270 9285 12% time 4687 5313 13% unicode 3831 4355 14% unicode/utf16 530 584 10% unicode/utf8 872 946 8% vendor/golang.org/x/net/http2/hpack 3386 3970 17% 1295440 1481566 14% orig. textual 4253585 1481566 -65% orig. binary 1724071 1481566 -14% Change-Id: I4177c6511cc57ebe5eb80c89bf3aefc83376ce86 Reviewed-on: https://go-review.googlesource.com/22096 Reviewed-by: Matthew Dempsky --- diff --git a/src/cmd/compile/internal/gc/bexport.go b/src/cmd/compile/internal/gc/bexport.go index e5fa3c39a6..eee71291be 100644 --- a/src/cmd/compile/internal/gc/bexport.go +++ b/src/cmd/compile/internal/gc/bexport.go @@ -36,25 +36,21 @@ If the field is a pointer to another object, that object is serialized, recursively. Otherwise the field is written. Non-pointer fields are all encoded as integer or string values. -Only packages and types may be referred to more than once. When getting -to a package or type that was not serialized before, an integer _index_ +Some objects (packages, types) may be referred to more than once. When +reaching an object that was not serialized before, an integer _index_ is assigned to it, starting at 0. In this case, the encoding starts with an integer _tag_ < 0. The tag value indicates the kind of object -(package or type) that follows and that this is the first time that we -see this object. If the package or tag was already serialized, the encoding -starts with the respective package or type index >= 0. An importer can -trivially determine if a package or type needs to be read in for the first -time (tag < 0) and entered into the respective package or type table, or -if the package or type was seen already (index >= 0), in which case the -index is used to look up the object in a table. +that follows and that this is the first time that we see this object. +If the object was already serialized, the encoding is simply the object +index >= 0. An importer can trivially determine if an object needs to +be read in for the first time (tag < 0) and entered into the respective +object table, or if the object was seen already (index >= 0), in which +case the index is used to look up the object in a table. Before exporting or importing, the type tables are populated with the predeclared types (int, string, error, unsafe.Pointer, etc.). This way they are automatically encoded with a known and fixed type index. -TODO(gri) We may consider using the same sharing for other items -that are written out, such as strings, or possibly symbols (*Sym). - Encoding format: The export data starts with a single byte indicating the encoding format @@ -73,11 +69,17 @@ the previously imported type pointer so that we have exactly one version (i.e., one pointer) for each named type (and read but discard the current type encoding). Unnamed types simply encode their respective fields. -In the encoding, some lists start with the list length (incl. strings). -Some lists are terminated with an end marker (usually for lists where -we may not know the length a priori). +In the encoding, some lists start with the list length. Some lists are +terminated with an end marker (usually for lists where we may not know +the length a priori). + +Integers use variable-length encoding for compact representation. -All integer values use variable-length encoding for compact representation. +Strings are canonicalized similar to objects that may occur multiple times: +If the string was exported already, it is represented by its index only. +Otherwise, the export data starts with the negative string length (negative, +so we can distinguish from string index), followed by the string bytes. +The empty string is mapped to index 0. The exporter and importer are completely symmetric in implementation: For each encoding routine there is a matching and symmetric decoding routine. @@ -125,9 +127,15 @@ const exportInlined = true // default: true type exporter struct { out *bufio.Writer - pkgIndex map[*Pkg]int // pkg -> pkg index in order of appearance - typIndex map[*Type]int // type -> type index in order of appearance - funcList []*Func // in order of appearance + // object -> index maps, indexed in order of serialization + strIndex map[string]int + pkgIndex map[*Pkg]int + typIndex map[*Type]int + funcList []*Func + + // position encoding + prevFile string + prevLine int // debugging support written int // bytes written @@ -139,6 +147,7 @@ type exporter struct { func export(out *bufio.Writer, trace bool) int { p := exporter{ out: out, + strIndex: map[string]int{"": 0}, // empty string is mapped to 0 pkgIndex: make(map[*Pkg]int), typIndex: make(map[*Type]int), trace: trace, @@ -149,7 +158,7 @@ func export(out *bufio.Writer, trace bool) int { if debugFormat { format = 'd' } - p.byte(format) + p.rawByte(format) // --- generic export data --- @@ -419,6 +428,7 @@ func (p *exporter) obj(sym *Sym) { } p.tag(constTag) + p.pos(n) // TODO(gri) In inlined functions, constants are used directly // so they should never occur as re-exported objects. We may // not need the qualified name here. See also comment above. @@ -447,6 +457,7 @@ func (p *exporter) obj(sym *Sym) { if n.Type.Etype == TFUNC && n.Class == PFUNC { // function p.tag(funcTag) + p.pos(n) p.qualifiedName(sym) sig := sym.Def.Type @@ -471,6 +482,7 @@ func (p *exporter) obj(sym *Sym) { } else { // variable p.tag(varTag) + p.pos(n) p.qualifiedName(sym) p.typ(sym.Def.Type) } @@ -480,6 +492,26 @@ func (p *exporter) obj(sym *Sym) { } } +func (p *exporter) pos(n *Node) { + var file string + var line int + if n != nil { + file, line = Ctxt.LineHist.FileLine(int(n.Lineno)) + } + + if file == p.prevFile && line != p.prevLine { + // common case: write delta-encoded line number + p.int(line - p.prevLine) // != 0 + } else { + // uncommon case: filename changed, or line didn't change + p.int(0) + p.string(file) + p.int(line) + p.prevFile = file + } + p.prevLine = line +} + func isInlineable(n *Node) bool { if exportInlined && n != nil && n.Func != nil && len(n.Func.Inl.Slice()) != 0 { // when lazily typechecking inlined bodies, some re-exported ones may not have been typechecked yet. @@ -525,13 +557,17 @@ func (p *exporter) typ(t *Type) { if t.Orig == t { Fatalf("exporter: predeclared type missing from type map?") } - // TODO(gri) The assertion below seems incorrect (crashes during all.bash). - // we expect the respective definition to point to us + + // TODO(gri) The assertion below is incorrect (crashes during all.bash), + // likely because of symbol shadowing (we expect the respective definition + // to point to us). Determine the correct Def so we get correct position + // info. // if tsym.Def.Type != t { // Fatalf("exporter: type definition doesn't point to us?") // } p.tag(namedTag) + p.pos(tsym.Def) // TODO(gri) this may not be the correct node - fix and add tests p.qualifiedName(tsym) // write underlying type @@ -564,6 +600,7 @@ func (p *exporter) typ(t *Type) { Fatalf("invalid symbol name: %s (%v)", m.Sym.Name, m.Sym) } + p.pos(m.Sym.Def) p.fieldSym(m.Sym, false) sig := m.Type @@ -668,8 +705,12 @@ func (p *exporter) fieldList(t *Type) { } func (p *exporter) field(f *Field) { + p.pos(f.Sym.Def) p.fieldName(f.Sym, f) p.typ(f.Type) + // TODO(gri) Do we care that a non-present tag cannot be distinguished + // from a present but empty ta string? (reflect doesn't seem to make + // a difference). Investigate. p.note(f.Note) } @@ -697,6 +738,7 @@ func (p *exporter) methodList(t *Type) { } func (p *exporter) method(m *Field) { + p.pos(m.Sym.Def) p.fieldName(m.Sym, m) p.paramList(m.Type.Params(), false) p.paramList(m.Type.Results(), false) @@ -793,9 +835,6 @@ func (p *exporter) param(q *Field, n int, numbered bool) { // TODO(gri) This is compiler-specific (escape info). // Move into compiler-specific section eventually? // (Not having escape info causes tests to fail, e.g. runtime GCInfoTest) - // - // TODO(gri) The q.Note is much more verbose that necessary and - // adds significantly to export data size. FIX THIS. p.note(q.Note) } @@ -1497,9 +1536,17 @@ func (p *exporter) string(s string) { if p.trace { p.tracef("%q ", s) } - p.rawInt64(int64(len(s))) + // if we saw the string before, write its index (>= 0) + // (the empty string is mapped to 0) + if i, ok := p.strIndex[s]; ok { + p.rawInt64(int64(i)) + return + } + // otherwise, remember string and write its negative length and bytes + p.strIndex[s] = len(p.strIndex) + p.rawInt64(-int64(len(s))) for i := 0; i < len(s); i++ { - p.byte(s[i]) + p.rawByte(s[i]) } } @@ -1507,7 +1554,7 @@ func (p *exporter) string(s string) { // it easy for a reader to detect if it is "out of sync". Used only // if debugFormat is set. func (p *exporter) marker(m byte) { - p.byte(m) + p.rawByte(m) // Uncomment this for help tracking down the location // of an incorrect marker when running in debugFormat. // if p.trace { @@ -1521,12 +1568,12 @@ func (p *exporter) rawInt64(x int64) { var tmp [binary.MaxVarintLen64]byte n := binary.PutVarint(tmp[:], x) for i := 0; i < n; i++ { - p.byte(tmp[i]) + p.rawByte(tmp[i]) } } -// byte is the bottleneck interface to write to p.out. -// byte escapes b as follows (any encoding does that +// rawByte is the bottleneck interface to write to p.out. +// rawByte escapes b as follows (any encoding does that // hides '$'): // // '$' => '|' 'S' @@ -1534,7 +1581,8 @@ func (p *exporter) rawInt64(x int64) { // // Necessary so other tools can find the end of the // export data by searching for "$$". -func (p *exporter) byte(b byte) { +// rawByte should only be used by low-level encoders. +func (p *exporter) rawByte(b byte) { switch b { case '$': // write '$' as '|' 'S' diff --git a/src/cmd/compile/internal/gc/bimport.go b/src/cmd/compile/internal/gc/bimport.go index 223cc443aa..6654345ead 100644 --- a/src/cmd/compile/internal/gc/bimport.go +++ b/src/cmd/compile/internal/gc/bimport.go @@ -20,13 +20,18 @@ import ( // changes to bimport.go and bexport.go. type importer struct { - in *bufio.Reader - buf []byte // for reading strings - bufarray [64]byte // initial underlying array for buf, large enough to avoid allocation when compiling std lib + in *bufio.Reader + buf []byte // reused for reading strings - pkgList []*Pkg // in order of appearance - typList []*Type // in order of appearance - funcList []*Node // in order of appearance; nil entry means already declared + // object lists, in order of deserialization + strList []string + pkgList []*Pkg + typList []*Type + funcList []*Node // nil entry means already declared + + // position encoding + prevFile string + prevLine int // debugging support debugFormat bool @@ -35,11 +40,13 @@ type importer struct { // Import populates importpkg from the serialized package data. func Import(in *bufio.Reader) { - p := importer{in: in} - p.buf = p.bufarray[:] + p := importer{ + in: in, + strList: []string{""}, // empty string is mapped to 0 + } // read low-level encoding format - switch format := p.byte(); format { + switch format := p.rawByte(); format { case 'c': // compact format - nothing to do case 'd': @@ -221,6 +228,7 @@ func idealType(typ *Type) *Type { func (p *importer) obj(tag int) { switch tag { case constTag: + p.pos() sym := p.qualifiedName() typ := p.typ() val := p.value(typ) @@ -230,11 +238,13 @@ func (p *importer) obj(tag int) { p.typ() case varTag: + p.pos() sym := p.qualifiedName() typ := p.typ() importvar(sym, typ) case funcTag: + p.pos() sym := p.qualifiedName() params := p.paramList() result := p.paramList() @@ -268,6 +278,22 @@ func (p *importer) obj(tag int) { } } +func (p *importer) pos() { + file := p.prevFile + line := p.prevLine + + if delta := p.int(); delta != 0 { + line += delta + } else { + file = p.string() + line = p.int() + p.prevFile = file + } + p.prevLine = line + + // TODO(gri) register new position +} + func (p *importer) newtyp(etype EType) *Type { t := typ(etype) p.typList = append(p.typList, t) @@ -286,6 +312,7 @@ func (p *importer) typ() *Type { switch i { case namedTag: // parser.go:hidden_importsym + p.pos() tsym := p.qualifiedName() // parser.go:hidden_pkgtype @@ -311,6 +338,7 @@ func (p *importer) typ() *Type { for i := p.int(); i > 0; i-- { // parser.go:hidden_fndcl + p.pos() sym := p.fieldSym() recv := p.paramList() // TODO(gri) do we need a full param list for the receiver? @@ -409,20 +437,19 @@ func (p *importer) qualifiedName() *Sym { } // parser.go:hidden_structdcl_list -func (p *importer) fieldList() []*Node { - i := p.int() - if i == 0 { - return nil - } - n := make([]*Node, i) - for i := range n { - n[i] = p.field() +func (p *importer) fieldList() (fields []*Node) { + if n := p.int(); n > 0 { + fields = make([]*Node, n) + for i := range fields { + fields[i] = p.field() + } } - return n + return } // parser.go:hidden_structdcl func (p *importer) field() *Node { + p.pos() sym := p.fieldName() typ := p.typ() note := p.note() @@ -456,20 +483,19 @@ func (p *importer) note() (v Val) { } // parser.go:hidden_interfacedcl_list -func (p *importer) methodList() []*Node { - i := p.int() - if i == 0 { - return nil - } - n := make([]*Node, i) - for i := range n { - n[i] = p.method() +func (p *importer) methodList() (methods []*Node) { + if n := p.int(); n > 0 { + methods = make([]*Node, n) + for i := range methods { + methods[i] = p.method() + } } - return n + return } // parser.go:hidden_interfacedcl func (p *importer) method() *Node { + p.pos() sym := p.fieldName() params := p.paramList() result := p.paramList() @@ -1056,29 +1082,31 @@ func (p *importer) int64() int64 { } func (p *importer) string() string { - if p.debugFormat { + if debugFormat { p.marker('s') } - - // TODO(gri) should we intern strings here? - - if n := int(p.rawInt64()); n > 0 { - if cap(p.buf) < n { - p.buf = make([]byte, n) - } else { - p.buf = p.buf[:n] - } - for i := range p.buf { - p.buf[i] = p.byte() - } - return string(p.buf) + // if the string was seen before, i is its index (>= 0) + // (the empty string is at index 0) + i := p.rawInt64() + if i >= 0 { + return p.strList[i] } - - return "" + // otherwise, i is the negative string length (< 0) + if n := int(-i); n <= cap(p.buf) { + p.buf = p.buf[:n] + } else { + p.buf = make([]byte, n) + } + for i := range p.buf { + p.buf[i] = p.rawByte() + } + s := string(p.buf) + p.strList = append(p.strList, s) + return s } func (p *importer) marker(want byte) { - if got := p.byte(); got != want { + if got := p.rawByte(); got != want { Fatalf("importer: incorrect marker: got %c; want %c (pos = %d)", got, want, p.read) } @@ -1099,12 +1127,13 @@ func (p *importer) rawInt64() int64 { // needed for binary.ReadVarint in rawInt64 func (p *importer) ReadByte() (byte, error) { - return p.byte(), nil + return p.rawByte(), nil } -// byte is the bottleneck interface for reading from p.in. +// rawByte is the bottleneck interface for reading from p.in. // It unescapes '|' 'S' to '$' and '|' '|' to '|'. -func (p *importer) byte() byte { +// rawByte should only be used by low-level decoders. +func (p *importer) rawByte() byte { c, err := p.in.ReadByte() p.read++ if err != nil { diff --git a/src/go/internal/gcimporter/bimport.go b/src/go/internal/gcimporter/bimport.go index 7a7bc871f4..d75e533e97 100644 --- a/src/go/internal/gcimporter/bimport.go +++ b/src/go/internal/gcimporter/bimport.go @@ -19,13 +19,18 @@ type importer struct { imports map[string]*types.Package data []byte path string + buf []byte // for reading strings - buf []byte // for reading strings - bufarray [64]byte // initial underlying array for buf, large enough to avoid allocation when compiling std lib + // object lists + strList []string // in order of appearance + pkgList []*types.Package // in order of appearance + typList []types.Type // in order of appearance - pkgList []*types.Package - typList []types.Type + // position encoding + prevFile string + prevLine int + // debugging support debugFormat bool read int // bytes read } @@ -39,11 +44,11 @@ func BImportData(imports map[string]*types.Package, data []byte, path string) (i imports: imports, data: data, path: path, + strList: []string{""}, // empty string is mapped to 0 } - p.buf = p.bufarray[:] // read low-level encoding format - switch format := p.byte(); format { + switch format := p.rawByte(); format { case 'c': // compact format - nothing to do case 'd': @@ -160,6 +165,7 @@ func (p *importer) declare(obj types.Object) { func (p *importer) obj(tag int) { switch tag { case constTag: + p.pos() pkg, name := p.qualifiedName() typ := p.typ(nil) val := p.value() @@ -169,11 +175,13 @@ func (p *importer) obj(tag int) { _ = p.typ(nil) case varTag: + p.pos() pkg, name := p.qualifiedName() typ := p.typ(nil) p.declare(types.NewVar(token.NoPos, pkg, name, typ)) case funcTag: + p.pos() pkg, name := p.qualifiedName() params, isddd := p.paramList() result, _ := p.paramList() @@ -185,6 +193,22 @@ func (p *importer) obj(tag int) { } } +func (p *importer) pos() { + file := p.prevFile + line := p.prevLine + + if delta := p.int(); delta != 0 { + line += delta + } else { + file = p.string() + line = p.int() + p.prevFile = file + } + p.prevLine = line + + // TODO(gri) register new position +} + func (p *importer) qualifiedName() (pkg *types.Package, name string) { name = p.string() pkg = p.pkg() @@ -220,6 +244,7 @@ func (p *importer) typ(parent *types.Package) types.Type { switch i { case namedTag: // read type object + p.pos() parent, name := p.qualifiedName() scope := parent.Scope() obj := scope.Lookup(name) @@ -252,6 +277,7 @@ func (p *importer) typ(parent *types.Package) types.Type { // read associated methods for i := p.int(); i > 0; i-- { // TODO(gri) replace this with something closer to fieldName + p.pos() name := p.string() if !exported(name) { p.pkg() @@ -293,14 +319,7 @@ func (p *importer) typ(parent *types.Package) types.Type { t := new(types.Struct) p.record(t) - n := p.int() - fields := make([]*types.Var, n) - tags := make([]string, n) - for i := range fields { - fields[i] = p.field(parent) - tags[i] = p.string() - } - *t = *types.NewStruct(fields, tags) + *t = *types.NewStruct(p.fieldList(parent)) return t case pointerTag: @@ -332,17 +351,7 @@ func (p *importer) typ(parent *types.Package) types.Type { panic("unexpected embedded interface") } - // read methods - methods := make([]*types.Func, p.int()) - for i := range methods { - pkg, name := p.fieldName(parent) - params, isddd := p.paramList() - result, _ := p.paramList() - sig := types.NewSignature(nil, params, result, isddd) - methods[i] = types.NewFunc(token.NoPos, pkg, name, sig) - } - - t := types.NewInterface(methods, nil) + t := types.NewInterface(p.methodList(parent), nil) p.typList[n] = t return t @@ -380,7 +389,20 @@ func (p *importer) typ(parent *types.Package) types.Type { } } +func (p *importer) fieldList(parent *types.Package) (fields []*types.Var, tags []string) { + if n := p.int(); n > 0 { + fields = make([]*types.Var, n) + tags = make([]string, n) + for i := range fields { + fields[i] = p.field(parent) + tags[i] = p.string() + } + } + return +} + func (p *importer) field(parent *types.Package) *types.Var { + p.pos() pkg, name := p.fieldName(parent) typ := p.typ(parent) @@ -402,6 +424,25 @@ func (p *importer) field(parent *types.Package) *types.Var { return types.NewField(token.NoPos, pkg, name, typ, anonymous) } +func (p *importer) methodList(parent *types.Package) (methods []*types.Func) { + if n := p.int(); n > 0 { + methods = make([]*types.Func, n) + for i := range methods { + methods[i] = p.method(parent) + } + } + return +} + +func (p *importer) method(parent *types.Package) *types.Func { + p.pos() + pkg, name := p.fieldName(parent) + params, isddd := p.paramList() + result, _ := p.paramList() + sig := types.NewSignature(nil, params, result, isddd) + return types.NewFunc(token.NoPos, pkg, name, sig) +} + func (p *importer) fieldName(parent *types.Package) (*types.Package, string) { pkg := parent if pkg == nil { @@ -567,24 +608,28 @@ func (p *importer) string() string { if p.debugFormat { p.marker('s') } - - if n := int(p.rawInt64()); n > 0 { - if cap(p.buf) < n { - p.buf = make([]byte, n) - } else { - p.buf = p.buf[:n] - } - for i := 0; i < n; i++ { - p.buf[i] = p.byte() - } - return string(p.buf) + // if the string was seen before, i is its index (>= 0) + // (the empty string is at index 0) + i := p.rawInt64() + if i >= 0 { + return p.strList[i] } - - return "" + // otherwise, i is the negative string length (< 0) + if n := int(-i); n <= cap(p.buf) { + p.buf = p.buf[:n] + } else { + p.buf = make([]byte, n) + } + for i := range p.buf { + p.buf[i] = p.rawByte() + } + s := string(p.buf) + p.strList = append(p.strList, s) + return s } func (p *importer) marker(want byte) { - if got := p.byte(); got != want { + if got := p.rawByte(); got != want { panic(fmt.Sprintf("incorrect marker: got %c; want %c (pos = %d)", got, want, p.read)) } @@ -605,12 +650,13 @@ func (p *importer) rawInt64() int64 { // needed for binary.ReadVarint in rawInt64 func (p *importer) ReadByte() (byte, error) { - return p.byte(), nil + return p.rawByte(), nil } // byte is the bottleneck interface for reading p.data. // It unescapes '|' 'S' to '$' and '|' '|' to '|'. -func (p *importer) byte() byte { +// rawByte should only be used by low-level decoders. +func (p *importer) rawByte() byte { b := p.data[0] r := 1 if b == '|' {