]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/6l: loop alignment, disabled
authorRuss Cox <rsc@golang.org>
Fri, 1 Jun 2012 14:23:15 +0000 (10:23 -0400)
committerRuss Cox <rsc@golang.org>
Fri, 1 Jun 2012 14:23:15 +0000 (10:23 -0400)
Saving the code in case we improve things enough that
it matters later, but at least right now it is not worth doing.

R=ken2
CC=golang-dev
https://golang.org/cl/6248071

src/cmd/6l/l.h
src/cmd/6l/span.c
src/libmach/8db.c

index b1611e016ab7b875dce282253b0019c5bd3150ea..4e271c31fefe43456389bef9746ec6db27f6368d 100644 (file)
@@ -41,6 +41,23 @@ enum
 {
        thechar = '6',
        PtrSize = 8,
+       
+       // Loop alignment constants:
+       // want to align loop entry to LoopAlign-byte boundary,
+       // and willing to insert at most MaxLoopPad bytes of NOP to do so.
+       // We define a loop entry as the target of a backward jump.
+       //
+       // gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
+       // and it aligns all jump targets, not just backward jump targets.
+       //
+       // As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
+       // is very slight but negative, so the alignment is disabled by
+       // setting MaxLoopPad = 0. The code is here for reference and
+       // for future experiments.
+       // 
+       LoopAlign = 16,
+       MaxLoopPad = 0,
+
        FuncAlign = 16
 };
 
index 28eb38f404b540fd51787037ba052f60203d3d1a..60916c0412f29ced4ec4d847476fd4199a7628f9 100644 (file)
@@ -37,6 +37,37 @@ static int   rexflag;
 static int     asmode;
 static vlong   vaddr(Adr*, Reloc*);
 
+// single-instruction no-ops of various lengths.
+// constructed by hand and disassembled with gdb to verify.
+// see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
+static uchar nop[][16] = {
+       {0x90},
+       {0x66, 0x90},
+       {0x0F, 0x1F, 0x00},
+       {0x0F, 0x1F, 0x40, 0x00},
+       {0x0F, 0x1F, 0x44, 0x00, 0x00},
+       {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
+       {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
+       {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+       {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+       {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+};
+
+static void
+fillnop(uchar *p, int n)
+{
+       int m;
+
+       while(n > 0) {
+               m = n;
+               if(m > nelem(nop))
+                       m = nelem(nop);
+               memmove(p, nop[m-1], m);
+               p += m;
+               n -= m;
+       }
+}
+
 void
 span1(Sym *s)
 {
@@ -52,8 +83,10 @@ span1(Sym *s)
 
        for(p = s->text; p != P; p = p->link) {
                p->back = 2;    // use short branches first time through
-               if((q = p->pcond) != P && (q->back & 2))
+               if((q = p->pcond) != P && (q->back & 2)) {
                        p->back |= 1;   // backward jump
+                       q->back |= 4;   // loop head
+               }
 
                if(p->as == AADJSP) {
                        p->to.type = D_SP;
@@ -78,6 +111,16 @@ span1(Sym *s)
                s->np = 0;
                c = 0;
                for(p = s->text; p != P; p = p->link) {
+                       if((p->back & 4) && (c&(LoopAlign-1)) != 0) {
+                               // pad with NOPs
+                               v = -c&(LoopAlign-1);
+                               if(v <= MaxLoopPad) {
+                                       symgrow(s, c+v);
+                                       fillnop(s->p+c, v);
+                                       c += v;
+                               }
+                       }
+
                        p->pc = c;
 
                        // process forward jumps to p
index ce1b4ddd7611ee3e28886f0a937b72089848a575..9ef02c4289cb1e3f627ca4232386579c821baeb9 100644 (file)
@@ -622,6 +622,7 @@ static Optable optab0F[256]=
 [0x15] =       { RM,0,         "UNPCKH%s       %x,%X" },
 [0x16] =       { RM,0,         "MOV[L]H%s      %x,%X" },       /* TO DO: L if source is XMM */
 [0x17] =       { RM,0,         "MOVH%s %X,%x" },
+[0x1F] =       { RM,0,         "NOP%S  %e" },
 [0x20] =       { RMR,0,                "MOVL   %C,%e" },
 [0x21] =       { RMR,0,                "MOVL   %D,%e" },
 [0x22] =       { RMR,0,                "MOVL   %e,%C" },