]> Cypherpunks repositories - gostls13.git/commitdiff
liblink: fix incorrect hash collision in lookup
authorRuss Cox <rsc@golang.org>
Wed, 16 Apr 2014 15:53:14 +0000 (11:53 -0400)
committerRuss Cox <rsc@golang.org>
Wed, 16 Apr 2014 15:53:14 +0000 (11:53 -0400)
linklookup uses hash(name, v) as the hash table index but then
only compares name to find a symbol to return.
If hash(name, v1) == hash(name, v2) for v1 != v2, the lookup
for v2 will return the symbol with v1.

The input routines assume that each symbol is found only once,
and then each symbol is added to a linked list, with the list header
in the symbol. Adding a symbol to such a list multiple times
short-circuits the list the second time it is added, causing symbols
to be dropped.

The liblink rewrite introduced an elegant, if inefficient, handling
of duplicated symbols by creating a dummy symbol to read the
duplicate into. The dummy symbols are named .dup with
sequential version numbers. With many .dup symbols, eventually
there will be a conflict, causing a duplicate list add, causing elided
symbols, causing a crash when calling one of the elided symbols.

The bug is old (2011) but could not have manifested until the
liblink rewrite introduced this heavily duplicated symbol .dup.
(See History section below.)

1. Correct the lookup function.

2. Since we want all the .dup symbols to be different, there's no
point in inserting them into the table. Call linknewsym directly,
avoiding the lookup function entirely.

3. Since nothing can refer to the .dup symbols, do not bother
adding them to the list of functions (textp) at all.

4. In lieu of a unit test, introduce additional consistency checks to
detect adding a symbol to a list multiple times. This would have
caught the short-circuit more directly, and it will detect a variety
of double-use bugs, including the one arising from the bad lookup.

Fixes #7749.

History

On April 9, 2011, I submitted CL 4383047, making ld 25% faster.
Much of the focus was on the hash table lookup function, and
one of the changes was to remove the s->version == v comparison [1].

I don't know if this was a simple editing error or if I reasoned that
same name but different v would yield a different hash slot and
so the name test alone sufficed. It is tempting to claim the former,
but it was probably the latter.

Because the hash is an iterated multiply+add, the version ends up
adding v*3ⁿ to the hash, where n is the length of the name.
A collision would need x*3ⁿ ≡ y*3ⁿ (mod 2²⁴ mod 100003),
or equivalently x*3ⁿ ≡ x*3ⁿ + (y-x)*3ⁿ (mod 2²⁴ mod 100003),
so collisions will actually be periodic: versions x and y collide
when d = y-x satisfies d*3ⁿ ≡ 0 (mod 2²⁴ mod 100003).
Since we allocate version numbers sequentially, this is actually
about the best case one could imagine: the collision rate is
much lower than if the hash were more random.
http://play.golang.org/p/TScD41c_hA computes the collision
period for various name lengths.

The most common symbol in the new linker is .dup, and for n=4
the period is maximized: the 100004th symbol is the first collision.
Unfortunately, there are programs with more duplicated symbols
than that.

In Go 1.2 and before, duplicate symbols were handled without
creating a dummy symbol, so this particular case for generating
many duplicate symbols could not happen. Go does not use
versioned symbols. Only C does; each input file gives a different
version to its static declarations. There just aren't enough C files
for this to come up in that context.

So the bug is old but the realization of the bug is new.

[1] https://golang.org/cl/4383047/diff/5001/src/cmd/ld/lib.c

LGTM=minux.ma, iant, dave
R=golang-codereviews, minux.ma, bradfitz, iant, dave
CC=golang-codereviews, r
https://golang.org/cl/87910047

include/link.h
src/cmd/ld/data.c
src/cmd/ld/go.c
src/cmd/ld/ldelf.c
src/cmd/ld/ldmacho.c
src/cmd/ld/ldpe.c
src/liblink/objfile.c
src/liblink/sym.c

index 200a503cce31cef9ee04fc0990ef65259344bdfb..c4a6c3dea6e4ba5e1a997e8d49f4fe9cead36162 100644 (file)
@@ -132,6 +132,7 @@ struct      LSym
        uchar   leaf;   // arm only
        uchar   fnptr;  // arm only
        uchar   seenglobl;
+       uchar   onlist; // on the textp or datap lists
        int16   symid;  // for writing .5/.6/.8 files
        int32   dynid;
        int32   sig;
index 27779acc63c8f2aaa9b38ce6f21c8acfe710d3c2..e5739686ef61f9697906cf0cfa02d79d1dc91aef 100644 (file)
@@ -757,6 +757,9 @@ dodata(void)
                if(!s->reachable || s->special)
                        continue;
                if(STEXT < s->type && s->type < SXREF) {
+                       if(s->onlist)
+                               sysfatal("symbol %s listed multiple times", s->name);
+                       s->onlist = 1;
                        if(last == nil)
                                datap = s;
                        else
index a5a0fa5433172994529c8aed2b56efdd0110bd6d..9c296b740f05cf0bc9fa4c72af1e06b0186908e1 100644 (file)
@@ -618,6 +618,7 @@ deadcode(void)
        for(s = ctxt->textp; s != nil; s = s->next) {
                if(!s->reachable)
                        continue;
+               // NOTE: Removing s from old textp and adding to new, shorter textp.
                if(last == nil)
                        ctxt->textp = s;
                else
index f1091d57c3910797c821b3b6bf34050843a87ac5..75899c769f91b5295b3ea5ccafa5616f59d0d1e2 100644 (file)
@@ -631,12 +631,18 @@ ldelf(Biobuf *f, char *pkg, int64 len, char *pn)
                if(s->sub)
                        s->sub = listsort(s->sub, valuecmp, offsetof(LSym, sub));
                if(s->type == STEXT) {
+                       if(s->onlist)
+                               sysfatal("symbol %s listed multiple times", s->name);
+                       s->onlist = 1;
                        if(ctxt->etextp)
                                ctxt->etextp->next = s;
                        else
                                ctxt->textp = s;
                        ctxt->etextp = s;
                        for(s = s->sub; s != S; s = s->sub) {
+                               if(s->onlist)
+                                       sysfatal("symbol %s listed multiple times", s->name);
+                               s->onlist = 1;
                                ctxt->etextp->next = s;
                                ctxt->etextp = s;
                        }
index 7fd366a258ae116546d1a8c72a783d4d3a6dc2a7..00d0acd221ede33c046f3b9185c5388c6b0e75f1 100644 (file)
@@ -679,12 +679,18 @@ ldmacho(Biobuf *f, char *pkg, int64 len, char *pn)
                        }
                }
                if(s->type == STEXT) {
+                       if(s->onlist)
+                               sysfatal("symbol %s listed multiple times", s->name);
+                       s->onlist = 1;
                        if(ctxt->etextp)
                                ctxt->etextp->next = s;
                        else
                                ctxt->textp = s;
                        ctxt->etextp = s;
                        for(s1 = s->sub; s1 != S; s1 = s1->sub) {
+                               if(s1->onlist)
+                                       sysfatal("symbol %s listed multiple times", s1->name);
+                               s1->onlist = 1;
                                ctxt->etextp->next = s1;
                                ctxt->etextp = s1;
                        }
index 1f1a51fad1fff8f350984a2b0c9be11160b1a62a..e4e3580b09d4d8a5497b941b5fbd631d6a67fd8d 100644 (file)
@@ -393,12 +393,18 @@ ldpe(Biobuf *f, char *pkg, int64 len, char *pn)
                if(s->sub)
                        s->sub = listsort(s->sub, valuecmp, offsetof(LSym, sub));
                if(s->type == STEXT) {
+                       if(s->onlist)
+                               sysfatal("symbol %s listed multiple times", s->name);
+                       s->onlist = 1;
                        if(ctxt->etextp)
                                ctxt->etextp->next = s;
                        else
                                ctxt->textp = s;
                        ctxt->etextp = s;
                        for(s = s->sub; s != S; s = s->sub) {
+                               if(s->onlist)
+                                       sysfatal("symbol %s listed multiple times", s->name);
+                               s->onlist = 1;
                                ctxt->etextp->next = s;
                                ctxt->etextp = s;
                        }
index c7700cc25ce45e1536fe6a64c15492d7b42e900c..b52b29ca59ebd55856f6340022e21c85f22e7d41 100644 (file)
@@ -167,6 +167,9 @@ linkwriteobj(Link *ctxt, Biobuf *b)
                                s = p->from.sym;
                                if(s->seenglobl++)
                                        print("duplicate %P\n", p);
+                               if(s->onlist)
+                                       sysfatal("symbol %s listed multiple times", s->name);
+                               s->onlist = 1;
                                if(data == nil)
                                        data = s;
                                else
@@ -205,6 +208,9 @@ linkwriteobj(Link *ctxt, Biobuf *b)
                                }
                                if(s->text != nil)
                                        sysfatal("duplicate TEXT for %s", s->name);
+                               if(s->onlist)
+                                       sysfatal("symbol %s listed multiple times", s->name);
+                               s->onlist = 1;
                                if(text == nil)
                                        text = s;
                                else
@@ -518,7 +524,7 @@ readsym(Link *ctxt, Biobuf *f, char *pkg, char *pn)
                        sysfatal("duplicate symbol %s (types %d and %d) in %s and %s", s->name, s->type, t, s->file, pn);
                if(s->np > 0) {
                        dup = s;
-                       s = linklookup(ctxt, ".dup", ndup++); // scratch
+                       s = linknewsym(ctxt, ".dup", ndup++); // scratch
                }
        }
        s->file = pkg;
@@ -595,11 +601,16 @@ readsym(Link *ctxt, Biobuf *f, char *pkg, char *pn)
                for(i=0; i<n; i++)
                        pc->file[i] = rdsym(ctxt, f, pkg);
 
-               if(ctxt->etextp)
-                       ctxt->etextp->next = s;
-               else
-                       ctxt->textp = s;
-               ctxt->etextp = s;
+               if(dup == nil) {
+                       if(s->onlist)
+                               sysfatal("symbol %s listed multiple times", s->name);
+                       s->onlist = 1;
+                       if(ctxt->etextp)
+                               ctxt->etextp->next = s;
+                       else
+                               ctxt->textp = s;
+                       ctxt->etextp = s;
+               }
        }
 
        if(ctxt->debugasm) {
index ff51b3df89e76b9a662100b3bb4d0588402d7feb..cba50e9c7eb624d6b59dd388c6fbedaa1410eac1 100644 (file)
@@ -232,7 +232,7 @@ _lookup(Link *ctxt, char *symb, int v, int creat)
        h &= 0xffffff;
        h %= LINKHASH;
        for(s = ctxt->hash[h]; s != nil; s = s->hash)
-               if(strcmp(s->name, symb) == 0)
+               if(s->version == v && strcmp(s->name, symb) == 0)
                        return s;
        if(!creat)
                return nil;