Much of the bulk of Go binaries is the symbol tables,
which give a name to every C string, Go string,
and reflection type symbol. These names are not worth
much other than seeing what's where in a binary.
This CL deletes all those names from the symbol table,
instead aggregating the symbols into contiguous blocks
and giving them the names "string.*", "go.string.*", and "type.*".
Before:
$ 6nm $(which godoc.old) | sort | grep ' string\.' | tail -10
59eda4 D string."
aa87ca22be8b05378eb1c71...
59ee08 D string."
b3312fa7e23ee7e4988e056...
59ee6c D string."func(*token.FileSet, st...
59eed0 D string."func(io.Writer, []uint8...
59ef34 D string."func(*tls.Config, *tls....
59ef98 D string."func(*bool, **template....
59effc D string."method(p *printer.print...
59f060 D string."method(S *scanner.Scann...
59f12c D string."func(*struct { begin in...
59f194 D string."method(ka *tls.ecdheRSA...
$
After:
$ 6nm $(which godoc) | sort | grep ' string\.' | tail -10
5e6a30 D string.*
$
Those names in the "Before" are truncated for the CL.
In the real binary they are the complete string, up to
a certain length, or else a unique identifier.
The same applies to the type and go.string symbols.
Removing the names cuts godoc by more than half:
-rwxr-xr-x 1 rsc rsc
9153405 2011-03-07 23:19 godoc.old
-rwxr-xr-x 1 rsc rsc
4290071 2011-03-07 23:19 godoc
For what it's worth, only 80% of what's left gets loaded
into memory; the other 20% is dwarf debugging information
only ever accessed by gdb:
-rwxr-xr-x 1 rsc rsc
3397787 2011-03-07 23:19 godoc.nodwarf
R=r, cw
CC=golang-dev
https://golang.org/cl/
4245072
for(h=0; h<NHASH; h++) {
for(s=hash[h]; s!=S; s=s->hash) {
+ if(s->hide)
+ continue;
switch(s->type) {
case SCONST:
case SRODATA:
uchar dynexport;
uchar leaf;
uchar stkcheck;
+ uchar hide;
int32 dynid;
int32 plt;
int32 got;
enum
{
- Sxxx,
-
- /* order here is order in output file */
- STEXT = 1,
- SRODATA,
- SELFDATA,
- SDATA,
- SBSS,
-
- SXREF,
- SFILE,
- SCONST,
- SDYNIMPORT,
-
- SSUB = 1<<8,
-
LFROM = 1<<0,
LTO = 1<<1,
LPOOL = 1<<2,
for(h=0; h<NHASH; h++) {
for(s=hash[h]; s!=S; s=s->hash) {
+ if(s->hide)
+ continue;
switch(s->type&~SSUB) {
case SCONST:
case SRODATA:
case SDATA:
case SELFDATA:
case SMACHOGOT:
+ case STYPE:
+ case SSTRING:
+ case SGOSTRING:
case SWINDOWS:
if(!s->reachable)
continue;
uchar dynexport;
uchar special;
uchar stkcheck;
+ uchar hide;
int32 dynid;
int32 sig;
int32 plt;
enum
{
- Sxxx,
-
- /* order here is order in output file */
- STEXT = 1,
- SELFDATA,
- SMACHOPLT,
- SRODATA,
- SDATA,
- SMACHOGOT,
- SWINDOWS,
- SBSS,
-
- SXREF,
- SMACHODYNSTR,
- SMACHODYNSYM,
- SMACHOINDIRECTPLT,
- SMACHOINDIRECTGOT,
- SFILE,
- SCONST,
- SDYNIMPORT,
- SSUB = 1<<8,
-
NHASH = 10007,
MINSIZ = 8,
STRINGSZ = 200,
for(h=0; h<NHASH; h++) {
for(s=hash[h]; s!=S; s=s->hash) {
+ if(s->hide)
+ continue;
switch(s->type&~SSUB) {
case SCONST:
case SRODATA:
uchar dynexport;
uchar special;
uchar stkcheck;
+ uchar hide;
int32 value;
int32 size;
int32 sig;
enum
{
- Sxxx,
-
- /* order here is order in output file */
- STEXT,
- SELFDATA,
- SMACHOPLT,
- SRODATA,
- SDATA,
- SMACHO, /* Mach-O __nl_symbol_ptr */
- SMACHOGOT,
- SWINDOWS,
- SBSS,
-
- SXREF,
- SMACHODYNSTR,
- SMACHODYNSYM,
- SMACHOINDIRECTPLT,
- SMACHOINDIRECTGOT,
- SFILE,
- SCONST,
- SDYNIMPORT,
-
- SSUB = 1<<8, /* sub-symbol, linked from parent via ->sub list */
-
NHASH = 10007,
MINSIZ = 4,
STRINGSZ = 200,
last = nil;
datap = nil;
+
for(h=0; h<NHASH; h++) {
for(s=hash[h]; s!=S; s=s->hash){
if(!s->reachable || s->special)
s = datap;
for(; s != nil && s->type < SDATA; s = s->next) {
s->type = SRODATA;
- t = rnd(s->size, 4);
+ t = rnd(s->size, PtrSize);
s->size = t;
s->value = datsize;
datsize += t;
if (strncmp(s, "type._.", 7) == 0)
return;
- if (strncmp(s, "type.", 5) == 0) {
+ if (strncmp(s, "type.", 5) == 0 && strcmp(s, "type.*") != 0) {
defgotype(sym);
return;
}
if(strncmp(s->name, "weak.", 5) == 0) {
s->special = 1; // do not lay out in data segment
s->reachable = 1;
+ s->hide = 1;
}
}
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
-// Where symbol table data gets mapped into memory.
-#define SYMDATVA 0x99LL<<24
+enum
+{
+ Sxxx,
+
+ /* order here is order in output file */
+ STEXT,
+ SELFDATA,
+ SMACHOPLT,
+ STYPE,
+ SSTRING,
+ SGOSTRING,
+ SRODATA,
+ SDATA,
+ SMACHO, /* Mach-O __nl_symbol_ptr */
+ SMACHOGOT,
+ SWINDOWS,
+ SBSS,
+
+ SXREF,
+ SMACHODYNSTR,
+ SMACHODYNSYM,
+ SMACHOINDIRECTPLT,
+ SMACHOINDIRECTGOT,
+ SFILE,
+ SCONST,
+ SDYNIMPORT,
+
+ SSUB = 1<<8, /* sub-symbol, linked from parent via ->sub list */
+};
typedef struct Library Library;
struct Library
void
symtab(void)
{
+ int32 h;
+ Sym *s;
+
// Define these so that they'll get put into the symbol table.
// data.c:/^address will provide the actual values.
xdefine("text", STEXT, 0);
xdefine("end", SBSS, 0);
xdefine("epclntab", SRODATA, 0);
xdefine("esymtab", SRODATA, 0);
+
+ // pseudo-symbols to mark locations of type, string, and go string data.
+ s = lookup("type.*", 0);
+ s->type = STYPE;
+ s->size = 0;
+ s->reachable = 1;
+
+ s = lookup("string.*", 0);
+ s->type = SSTRING;
+ s->size = 0;
+ s->reachable = 1;
+
+ s = lookup("go.string.*", 0);
+ s->type = SGOSTRING;
+ s->size = 0;
+ s->reachable = 1;
symt = lookup("symtab", 0);
symt->type = SRODATA;
symt->size = 0;
symt->reachable = 1;
+
+ // assign specific types so that they sort together.
+ // within a type they sort by size, so the .* symbols
+ // just defined above will be first.
+ // hide the specific symbols.
+ for(h=0; h<NHASH; h++) {
+ for(s=hash[h]; s!=S; s=s->hash){
+ if(!s->reachable || s->special || s->type != SRODATA)
+ continue;
+ if(strncmp(s->name, "type.", 5) == 0) {
+ s->type = STYPE;
+ s->hide = 1;
+ }
+ if(strncmp(s->name, "string.", 7) == 0) {
+ s->type = SSTRING;
+ s->hide = 1;
+ }
+ if(strncmp(s->name, "go.string.", 10) == 0) {
+ s->type = SGOSTRING;
+ s->hide = 1;
+ }
+ }
+ }
genasmsym(putsymb);
}