{
thechar = '6',
PtrSize = 8,
+
+ // Loop alignment constants:
+ // want to align loop entry to LoopAlign-byte boundary,
+ // and willing to insert at most MaxLoopPad bytes of NOP to do so.
+ // We define a loop entry as the target of a backward jump.
+ //
+ // gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
+ // and it aligns all jump targets, not just backward jump targets.
+ //
+ // As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
+ // is very slight but negative, so the alignment is disabled by
+ // setting MaxLoopPad = 0. The code is here for reference and
+ // for future experiments.
+ //
+ LoopAlign = 16,
+ MaxLoopPad = 0,
+
FuncAlign = 16
};
static int asmode;
static vlong vaddr(Adr*, Reloc*);
+// single-instruction no-ops of various lengths.
+// constructed by hand and disassembled with gdb to verify.
+// see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
+static uchar nop[][16] = {
+ {0x90},
+ {0x66, 0x90},
+ {0x0F, 0x1F, 0x00},
+ {0x0F, 0x1F, 0x40, 0x00},
+ {0x0F, 0x1F, 0x44, 0x00, 0x00},
+ {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
+ {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
+ {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+};
+
+static void
+fillnop(uchar *p, int n)
+{
+ int m;
+
+ while(n > 0) {
+ m = n;
+ if(m > nelem(nop))
+ m = nelem(nop);
+ memmove(p, nop[m-1], m);
+ p += m;
+ n -= m;
+ }
+}
+
void
span1(Sym *s)
{
for(p = s->text; p != P; p = p->link) {
p->back = 2; // use short branches first time through
- if((q = p->pcond) != P && (q->back & 2))
+ if((q = p->pcond) != P && (q->back & 2)) {
p->back |= 1; // backward jump
+ q->back |= 4; // loop head
+ }
if(p->as == AADJSP) {
p->to.type = D_SP;
s->np = 0;
c = 0;
for(p = s->text; p != P; p = p->link) {
+ if((p->back & 4) && (c&(LoopAlign-1)) != 0) {
+ // pad with NOPs
+ v = -c&(LoopAlign-1);
+ if(v <= MaxLoopPad) {
+ symgrow(s, c+v);
+ fillnop(s->p+c, v);
+ c += v;
+ }
+ }
+
p->pc = c;
// process forward jumps to p
[0x15] = { RM,0, "UNPCKH%s %x,%X" },
[0x16] = { RM,0, "MOV[L]H%s %x,%X" }, /* TO DO: L if source is XMM */
[0x17] = { RM,0, "MOVH%s %X,%x" },
+[0x1F] = { RM,0, "NOP%S %e" },
[0x20] = { RMR,0, "MOVL %C,%e" },
[0x21] = { RMR,0, "MOVL %D,%e" },
[0x22] = { RMR,0, "MOVL %e,%C" },