]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/6g: faster memmove/memset-like code using unaligned load/stores.
authorRémy Oudompheng <oudomphe@phare.normalesup.org>
Fri, 7 Feb 2014 22:58:21 +0000 (23:58 +0100)
committerRémy Oudompheng <oudomphe@phare.normalesup.org>
Fri, 7 Feb 2014 22:58:21 +0000 (23:58 +0100)
This changes makes sgen and clearfat use unaligned instructions for
the trailing bytes, like the runtime memmove does, resulting in faster
code when manipulating types whose size is not a multiple of 8.

LGTM=khr
R=khr, iant, rsc
CC=golang-codereviews
https://golang.org/cl/51740044

src/cmd/6g/cgen.c
src/cmd/6g/ggen.c

index da16071a8d955ddc6cbaae0bf3de177e87c9f608..76ece93b04cfa157f0258c8c36b12dcbdbb4f214 100644 (file)
@@ -1436,14 +1436,33 @@ sgen(Node *n, Node *ns, int64 w)
                        gins(AMOVSQ, N, N);     // MOVQ *(SI)+,*(DI)+
                        q--;
                }
-
-               if(c >= 4) {
-                       gins(AMOVSL, N, N);     // MOVL *(SI)+,*(DI)+
-                       c -= 4;
-               }
-               while(c > 0) {
-                       gins(AMOVSB, N, N);     // MOVB *(SI)+,*(DI)+
-                       c--;
+               // copy the remaining c bytes
+               if(w < 4 || c <= 1 || (odst < osrc && osrc < odst+w)) {
+                       while(c > 0) {
+                               gins(AMOVSB, N, N);     // MOVB *(SI)+,*(DI)+
+                               c--;
+                       }
+               } else if(w < 8 || c <= 4) {
+                       nodsi.op = OINDREG;
+                       noddi.op = OINDREG;
+                       nodsi.type = types[TINT32];
+                       noddi.type = types[TINT32];
+                       if(c > 4) {
+                               nodsi.xoffset = 0;
+                               noddi.xoffset = 0;
+                               gmove(&nodsi, &noddi);
+                       }
+                       nodsi.xoffset = c-4;
+                       noddi.xoffset = c-4;
+                       gmove(&nodsi, &noddi);
+               } else {
+                       nodsi.op = OINDREG;
+                       noddi.op = OINDREG;
+                       nodsi.type = types[TINT64];
+                       noddi.type = types[TINT64];
+                       nodsi.xoffset = c-8;
+                       noddi.xoffset = c-8;
+                       gmove(&nodsi, &noddi);
                }
        }
 
index 2bdb12bdd08be825c4e343cfb0d922f240d09a9a..1b8bf7e400e4b17670bde42757e0e1b9b383f4ea 100644 (file)
@@ -1016,7 +1016,8 @@ void
 clearfat(Node *nl)
 {
        int64 w, c, q;
-       Node n1, oldn1, ax, oldax;
+       Node n1, oldn1, ax, oldax, di, z;
+       Prog *p;
 
        /* clear a fat object */
        if(debug['g'])
@@ -1048,10 +1049,23 @@ clearfat(Node *nl)
                q--;
        }
 
-       if(c >= 4) {
-               gconreg(AMOVQ, c, D_CX);
-               gins(AREP, N, N);       // repeat
-               gins(ASTOSB, N, N);     // STOB AL,*(DI)+
+       z = ax;
+       di = n1;
+       if(w >= 8 && c >= 4) {
+               di.op = OINDREG;
+               di.type = z.type = types[TINT64];
+               p = gins(AMOVQ, &z, &di);
+               p->to.scale = 1;
+               p->to.offset = c-8;
+       } else if(c >= 4) {
+               di.op = OINDREG;
+               di.type = z.type = types[TINT32];
+               p = gins(AMOVL, &z, &di);
+               if(c > 4) {
+                       p = gins(AMOVL, &z, &di);
+                       p->to.scale = 1;
+                       p->to.offset = c-4;
+               }
        } else
        while(c > 0) {
                gins(ASTOSB, N, N);     // STOB AL,*(DI)+