Pseudo-instructions MOVBS and MOVHS are used to clarify
the semantics of short integers vs. registers:
* 8-bit and 16-bit values in registers are assumed to always
be zero-extended or sign-extended depending on their type.
* MOVB is truncation or move of an already extended value
between registers.
* MOVBU enforces zero-extension at the destination (register).
* MOVBS enforces sign-extension at the destination (register).
And similarly for MOVH/MOVS/MOVHU.
The linker is adapted to assemble MOVB and MOVH to an ordinary
mov. Also a peephole pass in 5g that aims at eliminating
redundant zero/sign extensions is improved.
encoding/binary:
benchmark old ns/op new ns/op delta
BenchmarkReadSlice1000Int32s 220387 217185 -1.45%
BenchmarkReadStruct 12839 12910 +0.55%
BenchmarkReadInts 5692 5534 -2.78%
BenchmarkWriteInts 6137 6016 -1.97%
BenchmarkPutUvarint32 257 241 -6.23%
BenchmarkPutUvarint64 812 754 -7.14%
benchmark old MB/s new MB/s speedup
BenchmarkReadSlice1000Int32s 18.15 18.42 1.01x
BenchmarkReadStruct 5.45 5.42 0.99x
BenchmarkReadInts 5.27 5.42 1.03x
BenchmarkWriteInts 4.89 4.99 1.02x
BenchmarkPutUvarint32 15.56 16.57 1.06x
BenchmarkPutUvarint64 9.85 10.60 1.08x
crypto/des:
benchmark old ns/op new ns/op delta
BenchmarkEncrypt 7002 5169 -26.18%
BenchmarkDecrypt 7015 5195 -25.94%
benchmark old MB/s new MB/s speedup
BenchmarkEncrypt 1.14 1.55 1.36x
BenchmarkDecrypt 1.14 1.54 1.35x
strconv:
benchmark old ns/op new ns/op delta
BenchmarkAtof64Decimal 457 385 -15.75%
BenchmarkAtof64Float 574 479 -16.55%
BenchmarkAtof64FloatExp 1035 906 -12.46%
BenchmarkAtof64Big 1793 1457 -18.74%
BenchmarkAtof64RandomBits 2267 2066 -8.87%
BenchmarkAtof64RandomFloats 1416 1194 -15.68%
BenchmarkAtof32Decimal 451 379 -15.96%
BenchmarkAtof32Float 547 435 -20.48%
BenchmarkAtof32FloatExp 1095 986 -9.95%
BenchmarkAtof32Random 1154 1006 -12.82%
BenchmarkAtoi 1415 1380 -2.47%
BenchmarkAtoiNeg 1414 1401 -0.92%
BenchmarkAtoi64 1744 1671 -4.19%
BenchmarkAtoi64Neg 1737 1662 -4.32%
Fixes #1837.
R=rsc, dave, bradfitz
CC=golang-dev
https://golang.org/cl/
12424043
p1->as = AMOVW;
if(v->etype == TCHAR || v->etype == TUCHAR)
- p1->as = AMOVB;
+ p1->as = AMOVBS;
if(v->etype == TSHORT || v->etype == TUSHORT)
- p1->as = AMOVH;
+ p1->as = AMOVHS;
if(v->etype == TFLOAT)
p1->as = AMOVF;
if(v->etype == TDOUBLE)
cgen(nl, &n1);
}
gins(a, &n2, &n1);
+ // Normalize result for types smaller than word.
+ if(n->type->width < widthptr) {
+ switch(n->op) {
+ case OADD:
+ case OSUB:
+ case OMUL:
+ gins(optoas(OAS, n->type), &n1, &n1);
+ break;
+ }
+ }
gmove(&n1, res);
regfree(&n1);
if(n2.op != OLITERAL)
gshift(AMOVW, &n2, SHIFT_LL, v, &n1);
gshift(AORR, &n2, SHIFT_LR, w-v, &n1);
regfree(&n2);
+ // Ensure sign/zero-extended result.
+ gins(optoas(OAS, nl->type), &n1, &n1);
}
gmove(&n1, res);
regfree(&n1);
else // OLSH
gshift(AMOVW, &n1, SHIFT_LL, sc, &n1);
}
+ if(w < 32 && op == OLSH)
+ gins(optoas(OAS, nl->type), &n1, &n1);
gmove(&n1, res);
regfree(&n1);
return;
regfree(&n3);
patch(p3, pc);
+ // Left-shift of smaller word must be sign/zero-extended.
+ if(w < 32 && op == OLSH)
+ gins(optoas(OAS, nl->type), &n2, &n2);
gmove(&n2, res);
regfree(&n1);
}
while(c > 0) {
- p = gins(AMOVBU, &nz, &dst);
+ p = gins(AMOVB, &nz, &dst);
p->to.type = D_OREG;
p->to.offset = 1;
p->scond |= C_PBIT;
* integer copy and truncate
*/
case CASE(TINT8, TINT8): // same size
+ if(!ismem(f)) {
+ a = AMOVB;
+ break;
+ }
case CASE(TUINT8, TINT8):
case CASE(TINT16, TINT8): // truncate
case CASE(TUINT16, TINT8):
case CASE(TINT32, TINT8):
case CASE(TUINT32, TINT8):
- a = AMOVB;
+ a = AMOVBS;
break;
- case CASE(TINT8, TUINT8):
case CASE(TUINT8, TUINT8):
+ if(!ismem(f)) {
+ a = AMOVB;
+ break;
+ }
+ case CASE(TINT8, TUINT8):
case CASE(TINT16, TUINT8):
case CASE(TUINT16, TUINT8):
case CASE(TINT32, TUINT8):
case CASE(TINT64, TINT8): // truncate low word
case CASE(TUINT64, TINT8):
- a = AMOVB;
+ a = AMOVBS;
goto trunc64;
case CASE(TINT64, TUINT8):
goto trunc64;
case CASE(TINT16, TINT16): // same size
+ if(!ismem(f)) {
+ a = AMOVH;
+ break;
+ }
case CASE(TUINT16, TINT16):
case CASE(TINT32, TINT16): // truncate
case CASE(TUINT32, TINT16):
- a = AMOVH;
+ a = AMOVHS;
break;
- case CASE(TINT16, TUINT16):
case CASE(TUINT16, TUINT16):
+ if(!ismem(f)) {
+ a = AMOVH;
+ break;
+ }
+ case CASE(TINT16, TUINT16):
case CASE(TINT32, TUINT16):
case CASE(TUINT32, TUINT16):
a = AMOVHU;
case CASE(TINT64, TINT16): // truncate low word
case CASE(TUINT64, TINT16):
- a = AMOVH;
+ a = AMOVHS;
goto trunc64;
case CASE(TINT64, TUINT16):
case CASE(TINT8, TUINT16):
case CASE(TINT8, TINT32):
case CASE(TINT8, TUINT32):
- a = AMOVB;
+ a = AMOVBS;
goto rdst;
case CASE(TINT8, TINT64): // convert via int32
case CASE(TINT8, TUINT64):
case CASE(TINT16, TINT32): // sign extend int16
case CASE(TINT16, TUINT32):
- a = AMOVH;
+ a = AMOVHS;
goto rdst;
case CASE(TINT16, TINT64): // convert via int32
case CASE(TINT16, TUINT64):
ta = AMOVW;
switch(tt) {
case TINT8:
- ta = AMOVB;
+ ta = AMOVBS;
break;
case TUINT8:
ta = AMOVBU;
break;
case TINT16:
- ta = AMOVH;
+ ta = AMOVHS;
break;
case TUINT16:
ta = AMOVHU;
fa = AMOVW;
switch(ft) {
case TINT8:
- fa = AMOVB;
+ fa = AMOVBS;
break;
case TUINT8:
fa = AMOVBU;
break;
case TINT16:
- fa = AMOVH;
+ fa = AMOVHS;
break;
case TUINT16:
fa = AMOVHU;
m1.xoffset = 0;
m1.op = OINDREG;
m1.type = types[TUINT8];
- gins(AMOVBU, &m1, &m2);
+ gins(AMOVB, &m1, &m2);
regfree(&m2);
regfree(&m1);
}
break;
case CASE(OAS, TBOOL):
- case CASE(OAS, TINT8):
a = AMOVB;
break;
+ case CASE(OAS, TINT8):
+ a = AMOVBS;
+ break;
+
case CASE(OAS, TUINT8):
a = AMOVBU;
break;
case CASE(OAS, TINT16):
- a = AMOVH;
+ a = AMOVHS;
break;
case CASE(OAS, TUINT16):
#include "opt.h"
int xtramodes(Reg*, Adr*);
+int shortprop(Reg *r);
int shiftprop(Reg *r);
void constprop(Adr *c1, Adr *v1, Reg *r);
+
+Reg* findpre(Reg *r, Adr *v);
void predicate(void);
int copyau1(Prog *p, Adr *v);
int isdconst(Addr *a);
peep(void)
{
Reg *r, *r1, *r2;
- Prog *p, *p1;
+ Prog *p;
int t;
- p1 = nil;
/*
* complete R structure
*/
// }
break;
+ case AMOVB:
+ case AMOVH:
case AMOVW:
case AMOVF:
case AMOVD:
}
break;
+ case AMOVHS:
+ case AMOVHU:
+ case AMOVBS:
+ case AMOVBU:
+ if(p->from.type == D_REG) {
+ if(shortprop(r))
+ t++;
+ }
+ break;
+
#ifdef NOTDEF
if(p->scond == C_SCOND_NONE)
if(regtyp(&p->to))
if(t)
goto loop1;
-
for(r=firstr; r!=R; r=r->link) {
p = r->prog;
switch(p->as) {
p->reg = NREG;
}
break;
-
- case AMOVH:
- case AMOVHS:
- case AMOVHU:
- case AMOVB:
- case AMOVBS:
- case AMOVBU:
- /*
- * look for MOVB x,R; MOVB R,R
- */
- r1 = r->link;
- if(p->to.type != D_REG)
- break;
- if(r1 == R)
- break;
- p1 = r1->prog;
- if(p1->as != p->as)
- break;
- if(p1->from.type != D_REG || p1->from.reg != p->to.reg)
- break;
- if(p1->to.type != D_REG || p1->to.reg != p->to.reg)
- break;
- excise(r1);
- break;
}
}
case AMOVF:
case AMOVD:
+ case AMOVB:
+ case AMOVH:
case AMOVW:
if(p->to.type == v1->type)
if(p->to.reg == v1->reg)
}
}
+/*
+ * shortprop eliminates redundant zero/sign extensions.
+ *
+ * MOVBS x, R
+ * <no use R>
+ * MOVBS R, R'
+ *
+ * changed to
+ *
+ * MOVBS x, R
+ * ...
+ * MOVB R, R' (compiled to mov)
+ *
+ * MOVBS above can be a MOVBS, MOVBU, MOVHS or MOVHU.
+ */
+int
+shortprop(Reg *r)
+{
+ Prog *p, *p1;
+ Reg *r1;
+
+ p = r->prog;
+ r1 = findpre(r, &p->from);
+ if(r1 == R)
+ return 0;
+
+ p1 = r1->prog;
+ if(p1->as == p->as) {
+ // Two consecutive extensions.
+ goto gotit;
+ }
+
+ if(p1->as == AMOVW && isdconst(&p1->from)
+ && p1->from.offset >= 0 && p1->from.offset < 128) {
+ // Loaded an immediate.
+ goto gotit;
+ }
+
+ return 0;
+
+gotit:
+ if(debug['P'])
+ print("shortprop\n%P\n%P", p1, p);
+ switch(p->as) {
+ case AMOVBS:
+ case AMOVBU:
+ p->as = AMOVB;
+ break;
+ case AMOVHS:
+ case AMOVHU:
+ p->as = AMOVH;
+ break;
+ }
+ if(debug['P'])
+ print(" => %A\n", p->as);
+ return 1;
+}
+
/*
* ASLL x,y,w
* .. (not use w, not set x y w)
print("What is this %E\n", v->etype);
case TINT8:
- p1->as = AMOVB;
+ p1->as = AMOVBS;
break;
case TBOOL:
case TUINT8:
p1->as = AMOVBU;
break;
case TINT16:
- p1->as = AMOVH;
+ p1->as = AMOVHS;
break;
case TUINT16:
p1->as = AMOVHU;
case ACMP: return o | (0xa<<21) | (1<<20);
case ACMN: return o | (0xb<<21) | (1<<20);
case AORR: return o | (0xc<<21);
+ case AMOVB:
+ case AMOVH:
case AMOVW: return o | (0xd<<21);
case ABIC: return o | (0xe<<21);
case AMVN: return o | (0xf<<21);
{ AMVN, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM },
{ ACMP, C_LCON, C_REG, C_NONE, 13, 8, 0, LFROM },
- { AMOVB, C_REG, C_NONE, C_REG, 14, 8, 0 },
+ { AMOVB, C_REG, C_NONE, C_REG, 1, 4, 0 },
{ AMOVBS, C_REG, C_NONE, C_REG, 14, 8, 0 },
{ AMOVBU, C_REG, C_NONE, C_REG, 58, 4, 0 },
- { AMOVH, C_REG, C_NONE, C_REG, 14, 8, 0 },
+ { AMOVH, C_REG, C_NONE, C_REG, 1, 4, 0 },
{ AMOVHS, C_REG, C_NONE, C_REG, 14, 8, 0 },
{ AMOVHU, C_REG, C_NONE, C_REG, 14, 8, 0 },