REP MOVSQ and REP STOSQ have a really high startup overhead.
Use a Duff's device to do the repetition instead.
benchmark old ns/op new ns/op delta
BenchmarkClearFat32 7.20 1.60 -77.78%
BenchmarkCopyFat32 6.88 2.38 -65.41%
BenchmarkClearFat64 7.15 3.20 -55.24%
BenchmarkCopyFat64 6.88 3.44 -50.00%
BenchmarkClearFat128 9.53 5.34 -43.97%
BenchmarkCopyFat128 9.27 5.56 -40.02%
BenchmarkClearFat256 13.8 9.53 -30.94%
BenchmarkCopyFat256 13.5 10.3 -23.70%
BenchmarkClearFat512 22.3 18.0 -19.28%
BenchmarkCopyFat512 22.0 19.7 -10.45%
BenchmarkCopyFat1024 36.5 38.4 +5.21%
BenchmarkClearFat1024 35.1 35.0 -0.28%
TODO: use for stack frame zeroing
TODO: REP prefixes are still used for "reverse" copying when src/dst
regions overlap. Might be worth fixing.
LGTM=rsc
R=golang-codereviews, rsc
CC=golang-codereviews, r
https://golang.org/cl/
81370046
Node nodl, nodr, nodsi, noddi, cx, oldcx, tmp;
vlong c, q, odst, osrc;
NodeList *l;
+ Prog *p;
if(debug['g']) {
print("\nsgen w=%lld\n", w);
gins(ACLD, N, N);
} else {
// normal direction
- if(q >= 4) {
+ if(q > 128) {
gconreg(movptr, q, D_CX);
gins(AREP, N, N); // repeat
gins(AMOVSQ, N, N); // MOVQ *(SI)+,*(DI)+
+ } else if (q >= 4) {
+ p = gins(ADUFFCOPY, N, N);
+ p->to.type = D_ADDR;
+ p->to.sym = linksym(pkglookup("duffcopy", runtimepkg));
+ // 14 and 128 = magic constants: see ../../pkg/runtime/asm_amd64.s
+ p->to.offset = 14*(128-q);
} else
while(q > 0) {
gins(AMOVSQ, N, N); // MOVQ *(SI)+,*(DI)+
savex(D_AX, &ax, &oldax, N, types[tptr]);
gconreg(AMOVL, 0, D_AX);
- if(q >= 4) {
+ if(q > 128) {
gconreg(movptr, q, D_CX);
gins(AREP, N, N); // repeat
gins(ASTOSQ, N, N); // STOQ AL,*(DI)+
+ } else if(q >= 4) {
+ p = gins(ADUFFZERO, N, N);
+ p->to.type = D_ADDR;
+ p->to.sym = linksym(pkglookup("duffzero", runtimepkg));
+ // 2 and 128 = magic constants: see ../../pkg/runtime/asm_amd64.s
+ p->to.offset = 2*(128-q);
} else
while(q > 0) {
gins(ASTOSQ, N, N); // STOQ AL,*(DI)+
[AMOVSL]= {OK, DI|SI, DI|SI},
[AMOVSQ]= {OK, DI|SI, DI|SI},
[AMOVSW]= {OK, DI|SI, DI|SI},
+ [ADUFFCOPY]= {OK, DI|SI, DI|SI|CX},
[AMOVSD]= {SizeD | LeftRead | RightWrite | Move},
[AMOVSS]= {SizeF | LeftRead | RightWrite | Move},
[ASTOSL]= {OK, AX|DI, DI},
[ASTOSQ]= {OK, AX|DI, DI},
[ASTOSW]= {OK, AX|DI, DI},
+ [ADUFFZERO]= {OK, AX|DI, DI},
[ASUBB]= {SizeB | LeftRead | RightRdwr | SetCarry},
[ASUBL]= {SizeL | LeftRead | RightRdwr | SetCarry},
ACHECKNIL,
AVARDEF,
AVARKILL,
+ ADUFFCOPY,
+ ADUFFZERO,
ALAST
};
Node dst, src, tdst, tsrc;
int32 c, q, odst, osrc;
NodeList *l;
+ Prog *p;
if(debug['g']) {
print("\nsgen w=%lld\n", w);
} else {
gins(ACLD, N, N); // paranoia. TODO(rsc): remove?
// normal direction
- if(q >= 4) {
+ if(q > 128) {
gconreg(AMOVL, q, D_CX);
gins(AREP, N, N); // repeat
gins(AMOVSL, N, N); // MOVL *(SI)+,*(DI)+
+ } else if(q >= 4) {
+ p = gins(ADUFFCOPY, N, N);
+ p->to.type = D_ADDR;
+ p->to.sym = linksym(pkglookup("duffcopy", runtimepkg));
+ // 10 and 128 = magic constants: see ../../pkg/runtime/asm_386.s
+ p->to.offset = 10*(128-q);
} else
while(q > 0) {
gins(AMOVSL, N, N); // MOVL *(SI)+,*(DI)+
{
uint32 w, c, q;
Node n1;
+ Prog *p;
/* clear a fat object */
if(debug['g'])
agen(nl, &n1);
gconreg(AMOVL, 0, D_AX);
- if(q >= 4) {
+ if(q > 128) {
gconreg(AMOVL, q, D_CX);
gins(AREP, N, N); // repeat
gins(ASTOSL, N, N); // STOL AL,*(DI)+
+ } else if(q >= 4) {
+ p = gins(ADUFFZERO, N, N);
+ p->to.type = D_ADDR;
+ p->to.sym = linksym(pkglookup("duffzero", runtimepkg));
+ // 1 and 128 = magic constants: see ../../pkg/runtime/asm_386.s
+ p->to.offset = 1*(128-q);
} else
while(q > 0) {
gins(ASTOSL, N, N); // STOL AL,*(DI)+
[AMOVSB]= {OK, DI|SI, DI|SI},
[AMOVSL]= {OK, DI|SI, DI|SI},
[AMOVSW]= {OK, DI|SI, DI|SI},
+ [ADUFFCOPY]= {OK, DI|SI, DI|SI|CX},
[AMOVSD]= {SizeD | LeftRead | RightWrite | Move},
[AMOVSS]= {SizeF | LeftRead | RightWrite | Move},
[ASTOSB]= {OK, AX|DI, DI},
[ASTOSL]= {OK, AX|DI, DI},
[ASTOSW]= {OK, AX|DI, DI},
+ [ADUFFZERO]= {OK, AX|DI, DI},
[ASUBB]= {SizeB | LeftRead | RightRdwr | SetCarry},
[ASUBL]= {SizeL | LeftRead | RightRdwr | SetCarry},
ACHECKNIL,
AVARDEF,
AVARKILL,
+ ADUFFCOPY,
+ ADUFFZERO,
ALAST
};
Ynone, Ybr, Zcall, 1,
0
};
+static uchar yduff[] =
+{
+ Ynone, Yi32, Zcall, 1,
+ 0
+};
static uchar yjmp[] =
{
Ynone, Yml, Zo_m64, 2,
{ APCDATA, ypcdata, Px, 0,0 },
{ ACHECKNIL },
{ AVARDEF },
+ { AVARKILL },
+ { ADUFFCOPY, yduff, Px, 0xe8 },
+ { ADUFFZERO, yduff, Px, 0xe8 },
{ AEND },
0
r = addrel(ctxt->cursym);
r->off = p->pc + ctxt->andptr - ctxt->and;
r->sym = p->to.sym;
+ r->add = p->to.offset;
r->type = D_PCREL;
r->siz = 4;
put4(ctxt, 0);
Ynone, Yi32, Zcallcon, 1,
0
};
+static uchar yduff[] =
+{
+ Ynone, Yi32, Zcall, 1,
+ 0
+};
static uchar yjmp[] =
{
Ynone, Yml, Zo_m, 2,
{ APCDATA, ypcdata, Px, 0,0 },
{ ACHECKNIL },
{ AVARDEF },
+ { AVARKILL },
+ { ADUFFCOPY, yduff, Px, 0xe8 },
+ { ADUFFZERO, yduff, Px, 0xe8 },
0
};
r->type = D_PCREL;
r->siz = 4;
r->sym = p->to.sym;
+ r->add = p->to.offset;
put4(ctxt, 0);
break;
SETEQ CX // 1 if alen == blen
LEAL -1(CX)(AX*2), AX // 1,0,-1 result
RET
+
+// A Duff's device for zeroing memory.
+// The compiler jumps to computed addresses within
+// this routine to zero chunks of memory. Do not
+// change this code without also changing the code
+// in ../../cmd/8g/ggen.c:clearfat.
+// AX: zero
+// DI: ptr to memory to be zeroed
+// DI is updated as a side effect.
+TEXT runtime·duffzero(SB), NOSPLIT, $0-0
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ STOSL
+ RET
+
+// A Duff's device for copying memory.
+// The compiler jumps to computed addresses within
+// this routine to copy chunks of memory. Source
+// and destination must not overlap. Do not
+// change this code without also changing the code
+// in ../../cmd/6g/cgen.c:sgen.
+// SI: ptr to source memory
+// DI: ptr to destination memory
+// SI and DI are updated as a side effect.
+
+// NOTE: this is equivalent to a sequence of MOVSL but
+// for some reason MOVSL is really slow.
+TEXT runtime·duffcopy(SB), NOSPLIT, $0-0
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ MOVL (SI),CX
+ ADDL $4,SI
+ MOVL CX,(DI)
+ ADDL $4,DI
+
+ RET
eqret:
MOVB AX, ret+48(FP)
RET
+
+// A Duff's device for zeroing memory.
+// The compiler jumps to computed addresses within
+// this routine to zero chunks of memory. Do not
+// change this code without also changing the code
+// in ../../cmd/6g/ggen.c:clearfat.
+// AX: zero
+// DI: ptr to memory to be zeroed
+// DI is updated as a side effect.
+TEXT runtime·duffzero(SB), NOSPLIT, $0-0
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ STOSQ
+ RET
+
+// A Duff's device for copying memory.
+// The compiler jumps to computed addresses within
+// this routine to copy chunks of memory. Source
+// and destination must not overlap. Do not
+// change this code without also changing the code
+// in ../../cmd/6g/cgen.c:sgen.
+// SI: ptr to source memory
+// DI: ptr to destination memory
+// SI and DI are updated as a side effect.
+
+// NOTE: this is equivalent to a sequence of MOVSQ but
+// for some reason that is 3.5x slower than this code.
+// The STOSQ above seem fine, though.
+TEXT runtime·duffcopy(SB), NOSPLIT, $0-0
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ MOVQ (SI),CX
+ ADDQ $8,SI
+ MOVQ CX,(DI)
+ ADDQ $8,DI
+
+ RET
func BenchmarkMemclr256(b *testing.B) { bmMemclr(b, 256) }
func BenchmarkMemclr4096(b *testing.B) { bmMemclr(b, 4096) }
func BenchmarkMemclr65536(b *testing.B) { bmMemclr(b, 65536) }
+
+func BenchmarkClearFat32(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ var x [32]byte
+ _ = x
+ }
+}
+func BenchmarkClearFat64(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ var x [64]byte
+ _ = x
+ }
+}
+func BenchmarkClearFat128(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ var x [128]byte
+ _ = x
+ }
+}
+func BenchmarkClearFat256(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ var x [256]byte
+ _ = x
+ }
+}
+func BenchmarkClearFat512(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ var x [512]byte
+ _ = x
+ }
+}
+func BenchmarkClearFat1024(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ var x [1024]byte
+ _ = x
+ }
+}
+
+func BenchmarkCopyFat32(b *testing.B) {
+ var x [32]byte
+ for i := 0; i < b.N; i++ {
+ y := x
+ _ = y
+ }
+}
+func BenchmarkCopyFat64(b *testing.B) {
+ var x [64]byte
+ for i := 0; i < b.N; i++ {
+ y := x
+ _ = y
+ }
+}
+func BenchmarkCopyFat128(b *testing.B) {
+ var x [128]byte
+ for i := 0; i < b.N; i++ {
+ y := x
+ _ = y
+ }
+}
+func BenchmarkCopyFat256(b *testing.B) {
+ var x [256]byte
+ for i := 0; i < b.N; i++ {
+ y := x
+ _ = y
+ }
+}
+func BenchmarkCopyFat512(b *testing.B) {
+ var x [512]byte
+ for i := 0; i < b.N; i++ {
+ y := x
+ _ = y
+ }
+}
+func BenchmarkCopyFat1024(b *testing.B) {
+ var x [1024]byte
+ for i := 0; i < b.N; i++ {
+ y := x
+ _ = y
+ }
+}