From 2286471651c826280455e87dff0b2409dc668ce7 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Wed, 25 May 2011 10:18:49 -0400 Subject: [PATCH] 5g: alignment fixes Makes all.bash work after echo 4 >/proc/cpu/alignment, which means kill the process on an unaligned access. The default behavior on DreamPlug/GuruPlug/SheevaPlug is to simulate an ARMv3 and just let the unaligned accesses stop at the word boundary, resulting in all kinds of surprises. Fixes #1240. R=ken2 CC=golang-dev https://golang.org/cl/4551064 --- src/cmd/5g/cgen.c | 217 +++++++++++++++++---------------------------- src/cmd/gc/align.c | 12 +-- src/cmd/gc/gen.c | 4 + src/cmd/gc/obj.c | 1 + 4 files changed, 95 insertions(+), 139 deletions(-) diff --git a/src/cmd/5g/cgen.c b/src/cmd/5g/cgen.c index 4e5f7ebcdc..76e2707fa9 100644 --- a/src/cmd/5g/cgen.c +++ b/src/cmd/5g/cgen.c @@ -1190,7 +1190,8 @@ void sgen(Node *n, Node *res, int32 w) { Node dst, src, tmp, nend; - int32 c, q, odst, osrc; + int32 c, odst, osrc; + int dir, align, op; Prog *p, *ploop; if(debug['g']) { @@ -1200,17 +1201,39 @@ sgen(Node *n, Node *res, int32 w) } if(w == 0) return; - if(n->ullman >= UINF && res->ullman >= UINF) { - fatal("sgen UINF"); - } - if(w < 0) fatal("sgen copy %d", w); + if(n->ullman >= UINF && res->ullman >= UINF) + fatal("sgen UINF"); + if(n->type == T) + fatal("sgen: missing type"); + + // determine alignment. + // want to avoid unaligned access, so have to use + // smaller operations for less aligned types. + // for example moving [4]byte must use 4 MOVB not 1 MOVW. + align = n->type->align; + op = 0; + switch(align) { + default: + fatal("sgen: invalid alignment %d for %T", align, n->type); + case 1: + op = AMOVB; + break; + case 2: + op = AMOVH; + break; + case 4: + op = AMOVW; + break; + } + if(w%align) + fatal("sgen: unaligned size %d (align=%d) for %T", w, align, n->type); + c = w / align; // offset on the stack osrc = stkof(n); odst = stkof(res); - if(osrc != -1000 && odst != -1000 && (osrc == 1000 || odst == 1000)) { // osrc and odst both on stack, and at least one is in // an unknown position. Could generate code to test @@ -1221,12 +1244,15 @@ sgen(Node *n, Node *res, int32 w) sgen(&tmp, res, w); return; } - - if(osrc % 4 != 0 || odst %4 != 0) - fatal("sgen: non word(4) aligned offset src %d or dst %d", osrc, odst); + if(osrc%align != 0 || odst%align != 0) + fatal("sgen: unaligned offset src %d or dst %d (align %d)", osrc, odst, align); + // if we are copying forward on the stack and + // the src and dst overlap, then reverse direction + dir = align; + if(osrc < odst && odst < osrc+w) + dir = -dir; regalloc(&dst, types[tptr], res); - if(n->ullman >= res->ullman) { agen(n, &dst); // temporarily use dst regalloc(&src, types[tptr], N); @@ -1240,141 +1266,64 @@ sgen(Node *n, Node *res, int32 w) regalloc(&tmp, types[TUINT32], N); - c = w % 4; // bytes - q = w / 4; // quads - - // if we are copying forward on the stack and - // the src and dst overlap, then reverse direction - if(osrc < odst && odst < osrc+w) { - if(c != 0) - fatal("sgen: reverse character copy not implemented"); - if(q >= 4) { - regalloc(&nend, types[TUINT32], N); - // set up end marker to 4 bytes before source - p = gins(AMOVW, &src, &nend); - p->from.type = D_CONST; - p->from.offset = -4; - - // move src and dest to the end of block - p = gins(AMOVW, &src, &src); - p->from.type = D_CONST; - p->from.offset = (q-1)*4; - - p = gins(AMOVW, &dst, &dst); - p->from.type = D_CONST; - p->from.offset = (q-1)*4; - - p = gins(AMOVW, &src, &tmp); - p->from.type = D_OREG; - p->from.offset = -4; - p->scond |= C_PBIT; - ploop = p; + // set up end marker + memset(&nend, 0, sizeof nend); + if(c >= 4) { + regalloc(&nend, types[TUINT32], N); - p = gins(AMOVW, &tmp, &dst); - p->to.type = D_OREG; - p->to.offset = -4; - p->scond |= C_PBIT; - - p = gins(ACMP, &src, N); - raddr(&nend, p); + p = gins(AMOVW, &src, &nend); + p->from.type = D_CONST; + if(dir < 0) + p->from.offset = dir; + else + p->from.offset = w; + } - patch(gbranch(ABNE, T), ploop); + // move src and dest to the end of block if necessary + if(dir < 0) { + p = gins(AMOVW, &src, &src); + p->from.type = D_CONST; + p->from.offset = w + dir; - regfree(&nend); - } else { - // move src and dest to the end of block - p = gins(AMOVW, &src, &src); - p->from.type = D_CONST; - p->from.offset = (q-1)*4; - - p = gins(AMOVW, &dst, &dst); - p->from.type = D_CONST; - p->from.offset = (q-1)*4; - - while(q > 0) { - p = gins(AMOVW, &src, &tmp); - p->from.type = D_OREG; - p->from.offset = -4; - p->scond |= C_PBIT; - - p = gins(AMOVW, &tmp, &dst); - p->to.type = D_OREG; - p->to.offset = -4; - p->scond |= C_PBIT; - - q--; - } - } + p = gins(AMOVW, &dst, &dst); + p->from.type = D_CONST; + p->from.offset = w + dir; + } + + // move + if(c >= 4) { + p = gins(op, &src, &tmp); + p->from.type = D_OREG; + p->from.offset = dir; + p->scond |= C_PBIT; + ploop = p; + + p = gins(op, &tmp, &dst); + p->to.type = D_OREG; + p->to.offset = dir; + p->scond |= C_PBIT; + + p = gins(ACMP, &src, N); + raddr(&nend, p); + + patch(gbranch(ABNE, T), ploop); + regfree(&nend); } else { - // normal direction - if(q >= 4) { - regalloc(&nend, types[TUINT32], N); - p = gins(AMOVW, &src, &nend); - p->from.type = D_CONST; - p->from.offset = q*4; - - p = gins(AMOVW, &src, &tmp); + while(c-- > 0) { + p = gins(op, &src, &tmp); p->from.type = D_OREG; - p->from.offset = 4; + p->from.offset = dir; p->scond |= C_PBIT; ploop = p; - - p = gins(AMOVW, &tmp, &dst); + + p = gins(op, &tmp, &dst); p->to.type = D_OREG; - p->to.offset = 4; + p->to.offset = dir; p->scond |= C_PBIT; - - p = gins(ACMP, &src, N); - raddr(&nend, p); - - patch(gbranch(ABNE, T), ploop); - - regfree(&nend); - } else - while(q > 0) { - p = gins(AMOVW, &src, &tmp); - p->from.type = D_OREG; - p->from.offset = 4; - p->scond |= C_PBIT; - - p = gins(AMOVW, &tmp, &dst); - p->to.type = D_OREG; - p->to.offset = 4; - p->scond |= C_PBIT; - - q--; - } - - if (c != 0) { - // MOVW (src), tmp - p = gins(AMOVW, &src, &tmp); - p->from.type = D_OREG; - - // MOVW tmp<<((4-c)*8),src - gshift(AMOVW, &tmp, SHIFT_LL, ((4-c)*8), &src); - - // MOVW src>>((4-c)*8),src - gshift(AMOVW, &src, SHIFT_LR, ((4-c)*8), &src); - - // MOVW (dst), tmp - p = gins(AMOVW, &dst, &tmp); - p->from.type = D_OREG; - - // MOVW tmp>>(c*8),tmp - gshift(AMOVW, &tmp, SHIFT_LR, (c*8), &tmp); - - // MOVW tmp<<(c*8),tmp - gshift(AMOVW, &tmp, SHIFT_LL, c*8, &tmp); - - // ORR src, tmp - gins(AORR, &src, &tmp); - - // MOVW tmp, (dst) - p = gins(AMOVW, &tmp, &dst); - p->to.type = D_OREG; } } - regfree(&dst); + + regfree(&dst); regfree(&src); regfree(&tmp); } diff --git a/src/cmd/gc/align.c b/src/cmd/gc/align.c index a8454bf130..7fcac48339 100644 --- a/src/cmd/gc/align.c +++ b/src/cmd/gc/align.c @@ -234,9 +234,11 @@ dowidth(Type *t) if(t->bound > cap) yyerror("type %lT larger than address space", t); w = t->bound * t->type->width; - if(w == 0) - w = 1; t->align = t->type->align; + if(w == 0) { + w = 1; + t->align = 1; + } } else if(t->bound == -1) { w = sizeof_Array; @@ -253,10 +255,10 @@ dowidth(Type *t) if(t->funarg) fatal("dowidth fn struct %T", t); w = widstruct(t, 0, 1); - if(w == 0) + if(w == 0) { w = 1; - //if(t->align < widthptr) - // warn("align %d: %T\n", t->align, t); + t->align = 1; + } break; case TFUNC: diff --git a/src/cmd/gc/gen.c b/src/cmd/gc/gen.c index 8ad6c437de..0b6f5bbd83 100644 --- a/src/cmd/gc/gen.c +++ b/src/cmd/gc/gen.c @@ -59,6 +59,8 @@ allocparams(void) fatal("bad width"); stksize += w; stksize = rnd(stksize, n->type->align); + if(thechar == '5') + stksize = rnd(stksize, widthptr); n->xoffset = -stksize; } lineno = lno; @@ -698,6 +700,8 @@ tempname(Node *n, Type *t) w = t->width; stksize += w; stksize = rnd(stksize, t->align); + if(thechar == '5') + stksize = rnd(stksize, widthptr); n->xoffset = -stksize; n->pun = anyregalloc(); } diff --git a/src/cmd/gc/obj.c b/src/cmd/gc/obj.c index 9f4b7b318d..f34fc76c85 100644 --- a/src/cmd/gc/obj.c +++ b/src/cmd/gc/obj.c @@ -284,6 +284,7 @@ stringsym(char *s, int len) off = dsname(sym, off, s+n, m); } off = duint8(sym, off, 0); // terminating NUL for runtime + off = (off+widthptr-1)&~(widthptr-1); // round to pointer alignment ggloblsym(sym, off, 1); text(); -- 2.50.0