uint32 BLOAD(Reg*);
 uint32 BSTORE(Reg*);
-uint32 LOAD(Reg*);
-uint32 STORE(Reg*);
+uint64 LOAD(Reg*);
+uint64 STORE(Reg*);
 */
 
 // A Reg is a wrapper around a single Prog (one instruction) that holds
 uint32 allreg(uint32, Rgn*);
 void   paint1(Reg*, int);
 uint32 paint2(Reg*, int);
-void   paint3(Reg*, int, int32, int);
+void   paint3(Reg*, int, uint32, int);
 void   addreg(Adr*, int);
 void   dumpit(char *str, Flow *r0, int);
 
 void   excise(Flow*);
 int    copyu(Prog*, Adr*, Adr*);
 
-int32  RtoB(int);
-int32  FtoB(int);
-int    BtoR(int32);
-int    BtoF(int32);
+uint32 RtoB(int);
+uint32 FtoB(int);
+int    BtoR(uint32);
+int    BtoF(uint32);
 
 /*
  * prog.c
 
 #include "opt.h"
 
 #define        NREGVAR 32
-#define        REGBITS ((uint32)0xffffffff)
+#define        REGBITS ((uint64)0xffffffffull)
 /*c2go enum {
        NREGVAR = 32,
        REGBITS = 0xffffffff,
                i = bnum(bit);
                node = var[i].node;
                n = var[i].name;
-               bit.b[i/32] &= ~(1L<<(i%32));
+               biclr(&bit, i);
 
                // disable all pieces of that variable
                for(i=0; i<nvar; i++) {
                        if(debug['R'] > 1)
                                print("\n");
                        paint1(r, i);
-                       bit.b[i/32] &= ~(1L<<(i%32));
+                       biclr(&bit, i);
                        if(change <= 0) {
                                if(debug['R'])
                                        print("%L $%d: %Q\n",
                        break;
                for(v=n->opt; v!=nil; v=v->nextinnode) {
                        bn = v - var;
-                       r1->act.b[bn/32] |= 1L << (bn%32);
+                       biset(&r1->act, bn);
                }
                if(r1->f.prog->as == ABL)
                        break;
                                        ~(r->calahead.b[z] & addrs.b[z]);
                        while(bany(&bit)) {
                                i = bnum(bit);
-                               bit.b[i/32] &= ~(1L << (i%32));
+                               biclr(&bit, i);
                        }
                }
        }
                        for(z=0; z<BITS; z++) {
                                if(cal.b[z] == 0)
                                        continue;
-                               for(i=0; i<32; i++) {
-                                       if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0)
+                               for(i=0; i<64; i++) {
+                                       if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
                                                continue;
-                                       v = var+z*32+i;
+                                       v = var+z*64+i;
                                        if(v->node->opt == nil) // v represents fixed register, not Go variable
                                                continue;
 
                                        // This will set the bits at most twice, keeping the overall loop linear.
                                        v1 = v->node->opt;
                                        j = v1 - var;
-                                       if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) {
+                                       if(v == v1 || !btest(&cal, j)) {
                                                for(; v1 != nil; v1 = v1->nextinnode) {
                                                        j = v1 - var;
-                                                       cal.b[j/32] |= 1<<(j&31);
+                                                       biset(&cal, j);
                                                }
                                        }
                                }
        Reg *r1;
        Prog *p;
        int z;
-       uint32 bb;
+       uint64 bb;
 
-       z = bn/32;
-       bb = 1L<<(bn%32);
+       z = bn/64;
+       bb = 1LL<<(bn%64);
        if(r->act.b[z] & bb)
                return;
        for(;;) {
 {
        Reg *r1;
        int z;
-       uint32 bb, vreg;
+       uint64 bb, vreg;
 
-       z = bn/32;
-       bb = 1L << (bn%32);
+       z = bn/64;
+       bb = 1LL << (bn%64);
        vreg = regbits;
        if(!(r->act.b[z] & bb))
                return vreg;
 }
 
 void
-paint3(Reg *r, int bn, int32 rb, int rn)
+paint3(Reg *r, int bn, uint32 rb, int rn)
 {
        Reg *r1;
        Prog *p;
        int z;
-       uint32 bb;
+       uint64 bb;
 
-       z = bn/32;
-       bb = 1L << (bn%32);
+       z = bn/64;
+       bb = 1LL << (bn%64);
        if(r->act.b[z] & bb)
                return;
        for(;;) {
  *     10      R10
  *     12  R12
  */
-int32
+uint32
 RtoB(int r)
 {
        if(r >= REGTMP-2 && r != 12)    // excluded R9 and R10 for m and g, but not R12
 }
 
 int
-BtoR(int32 b)
+BtoR(uint32 b)
 {
        b &= 0x11fcL;   // excluded R9 and R10 for m and g, but not R12
        if(b == 0)
  *     ...     ...
  *     31      F15
  */
-int32
+uint32
 FtoB(int f)
 {
 
 }
 
 int
-BtoF(int32 b)
+BtoF(uint32 b)
 {
 
        b &= 0xfffc0000L;
 
 
 uint32 BLOAD(Reg*);
 uint32 BSTORE(Reg*);
-uint32 LOAD(Reg*);
-uint32 STORE(Reg*);
+uint64 LOAD(Reg*);
+uint64 STORE(Reg*);
 */
 
 // A Reg is a wrapper around a single Prog (one instruction) that holds
 uint32 allreg(uint32, Rgn*);
 void   paint1(Reg*, int);
 uint32 paint2(Reg*, int);
-void   paint3(Reg*, int, int32, int);
+void   paint3(Reg*, int, uint32, int);
 void   addreg(Adr*, int);
 void   dumpone(Flow*, int);
 void   dumpit(char*, Flow*, int);
 void   excise(Flow*);
 int    copyu(Prog*, Adr*, Adr*);
 
-int32  RtoB(int);
-int32  FtoB(int);
-int    BtoR(int32);
-int    BtoF(int32);
+uint32 RtoB(int);
+uint32 FtoB(int);
+int    BtoR(uint32);
+int    BtoF(uint32);
 
 /*
  * prog.c
 
 #include "opt.h"
 
 #define        NREGVAR 32      /* 16 general + 16 floating */
-#define        REGBITS ((uint32)0xffffffff)
+#define        REGBITS ((uint64)0xffffffffull)
 /*c2go enum {
        NREGVAR = 32,
        REGBITS = 0xffffffff,
                i = bnum(bit);
                node = var[i].node;
                n = var[i].name;
-               bit.b[i/32] &= ~(1L<<(i%32));
+               biclr(&bit, i);
 
                // disable all pieces of that variable
                for(i=0; i<nvar; i++) {
                        rgp->varno = i;
                        change = 0;
                        paint1(r, i);
-                       bit.b[i/32] &= ~(1L<<(i%32));
+                       biclr(&bit, i);
                        if(change <= 0)
                                continue;
                        rgp->cost = change;
                        break;
                for(v=n->opt; v!=nil; v=v->nextinnode) {
                        bn = v - var;
-                       r1->act.b[bn/32] |= 1L << (bn%32);
+                       biset(&r1->act, bn);
                }
                if(r1->f.prog->as == ACALL)
                        break;
                        for(z=0; z<BITS; z++) {
                                if(cal.b[z] == 0)
                                        continue;
-                               for(i=0; i<32; i++) {
-                                       if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0)
+                               for(i=0; i<64; i++) {
+                                       if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
                                                continue;
-                                       v = var+z*32+i;
+                                       v = var+z*64+i;
                                        if(v->node->opt == nil) // v represents fixed register, not Go variable
                                                continue;
 
                                        // This will set the bits at most twice, keeping the overall loop linear.
                                        v1 = v->node->opt;
                                        j = v1 - var;
-                                       if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) {
+                                       if(v == v1 || !btest(&cal, j)) {
                                                for(; v1 != nil; v1 = v1->nextinnode) {
                                                        j = v1 - var;
-                                                       cal.b[j/32] |= 1UL<<(j&31);
+                                                       biset(&cal, j);
                                                }
                                        }
                                }
 {
        Reg *r1;
        int z;
-       uint32 bb;
+       uint64 bb;
 
-       z = bn/32;
-       bb = 1L<<(bn%32);
+       z = bn/64;
+       bb = 1LL<<(bn%64);
        if(r->act.b[z] & bb)
                return;
        for(;;) {
 {
        Reg *r1;
        int z;
-       uint32 bb, vreg, x;
+       uint64 bb, vreg, x;
 
-       z = bn/32;
-       bb = 1L << (bn%32);
+       z = bn/64;
+       bb = 1LL << (bn%64);
        vreg = regbits;
        if(!(r->act.b[z] & bb))
                return vreg;
 }
 
 void
-paint3(Reg *r, int bn, int32 rb, int rn)
+paint3(Reg *r, int bn, uint32 rb, int rn)
 {
        Reg *r1;
        Prog *p;
        int z;
-       uint32 bb;
+       uint64 bb;
 
-       z = bn/32;
-       bb = 1L << (bn%32);
+       z = bn/64;
+       bb = 1LL << (bn%64);
        if(r->act.b[z] & bb)
                return;
        for(;;) {
        ostats.ncvtreg++;
 }
 
-int32
+uint32
 RtoB(int r)
 {
 
 }
 
 int
-BtoR(int32 b)
+BtoR(uint32 b)
 {
        b &= 0xffffL;
        if(nacl)
  *     ...
  *     31      X15
  */
-int32
+uint32
 FtoB(int f)
 {
        if(f < D_X0 || f > D_X15)
 }
 
 int
-BtoF(int32 b)
+BtoF(uint32 b)
 {
 
        b &= 0xFFFF0000L;
 
 
 uint32 BLOAD(Reg*);
 uint32 BSTORE(Reg*);
-uint32 LOAD(Reg*);
-uint32 STORE(Reg*);
+uint64 LOAD(Reg*);
+uint64 STORE(Reg*);
 */
 
 // A Reg is a wrapper around a single Prog (one instruction) that holds
 uint32 allreg(uint32, Rgn*);
 void   paint1(Reg*, int);
 uint32 paint2(Reg*, int);
-void   paint3(Reg*, int, int32, int);
+void   paint3(Reg*, int, uint32, int);
 void   addreg(Adr*, int);
 void   dumpone(Flow*, int);
 void   dumpit(char*, Flow*, int);
 void   excise(Flow*);
 int    copyu(Prog*, Adr*, Adr*);
 
-int32  RtoB(int);
-int32  FtoB(int);
-int    BtoR(int32);
-int    BtoF(int32);
+uint32 RtoB(int);
+uint32 FtoB(int);
+int    BtoR(uint32);
+int    BtoF(uint32);
 
 /*
  * prog.c
 
 #include "opt.h"
 
 #define        NREGVAR 16      /* 8 integer + 8 floating */
-#define        REGBITS ((uint32)0xffff)
+#define        REGBITS ((uint64)0xffffull)
 /*c2go enum {
        NREGVAR = 16,
        REGBITS = (1<<NREGVAR) - 1,
                i = bnum(bit);
                node = var[i].node;
                n = var[i].name;
-               bit.b[i/32] &= ~(1L<<(i%32));
+               biclr(&bit, i);
 
                // disable all pieces of that variable
                for(i=0; i<nvar; i++) {
                        rgp->varno = i;
                        change = 0;
                        paint1(r, i);
-                       bit.b[i/32] &= ~(1L<<(i%32));
+                       biclr(&bit, i);
                        if(change <= 0)
                                continue;
                        rgp->cost = change;
                        break;
                for(v=n->opt; v!=nil; v=v->nextinnode) {
                        bn = v - var;
-                       r1->act.b[bn/32] |= 1L << (bn%32);
+                       biset(&r1->act, bn);
                }
                if(r1->f.prog->as == ACALL)
                        break;
                        for(z=0; z<BITS; z++) {
                                if(cal.b[z] == 0)
                                        continue;
-                               for(i=0; i<32; i++) {
-                                       if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0)
+                               for(i=0; i<64; i++) {
+                                       if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
                                                continue;
-                                       v = var+z*32+i;
+                                       v = var+z*64+i;
                                        if(v->node->opt == nil) // v represents fixed register, not Go variable
                                                continue;
 
                                        // This will set the bits at most twice, keeping the overall loop linear.
                                        v1 = v->node->opt;
                                        j = v1 - var;
-                                       if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) {
+                                       if(v == v1 || !btest(&cal, j)) {
                                                for(; v1 != nil; v1 = v1->nextinnode) {
                                                        j = v1 - var;
-                                                       cal.b[j/32] |= 1<<(j&31);
+                                                       biset(&cal, j);
                                                }
                                        }
                                }
        Reg *r1;
        Prog *p;
        int z;
-       uint32 bb;
+       uint64 bb;
 
-       z = bn/32;
-       bb = 1L<<(bn%32);
+       z = bn/64;
+       bb = 1LL<<(bn%64);
        if(r->act.b[z] & bb)
                return;
        for(;;) {
 {
        Reg *r1;
        int z;
-       uint32 bb, vreg, x;
+       uint64 bb, vreg, x;
 
-       z = bn/32;
-       bb = 1L << (bn%32);
+       z = bn/64;
+       bb = 1LL << (bn%64);
        vreg = regbits;
        if(!(r->act.b[z] & bb))
                return vreg;
 }
 
 void
-paint3(Reg *r, int bn, int32 rb, int rn)
+paint3(Reg *r, int bn, uint32 rb, int rn)
 {
        Reg *r1;
        Prog *p;
        int z;
-       uint32 bb;
+       uint64 bb;
 
-       z = bn/32;
-       bb = 1L << (bn%32);
+       z = bn/64;
+       bb = 1LL << (bn%64);
        if(r->act.b[z] & bb)
                return;
        for(;;) {
        ostats.ncvtreg++;
 }
 
-int32
+uint32
 RtoB(int r)
 {
 
 }
 
 int
-BtoR(int32 b)
+BtoR(uint32 b)
 {
 
        b &= 0xffL;
        return bitno(b) + D_AX;
 }
 
-int32
+uint32
 FtoB(int f)
 {
        if(f < D_X0 || f > D_X7)
 }
 
 int
-BtoF(int32 b)
+BtoF(uint32 b)
 {
        b &= 0xFF00L;
        if(b == 0)
 
 bnum(Bits a)
 {
        int i;
-       int32 b;
+       uint64 b;
 
        for(i=0; i<BITS; i++)
                if(b = a.b[i])
-                       return 32*i + bitno(b);
+                       return 64*i + bitno(b);
        fatal("bad in bnum");
        return 0;
 }
        Bits c;
 
        c = zbits;
-       c.b[n/32] = 1L << (n%32);
+       c.b[n/64] = 1LL << (n%64);
        return c;
 }
 
-/*
 int
-bset(Bits a, uint n)
+btest(Bits *a, uint n)
 {
-       if(a.b[n/32] & (1L << (n%32)))
-               return 1;
-       return 0;
+       return (a->b[n/64] & (1LL << (n%64))) != 0;
+}
+
+void
+biset(Bits *a, uint n)
+{
+       a->b[n/64] |= 1LL << (n%64);
+}
+
+void
+biclr(Bits *a, uint n)
+{
+       a->b[n/64] &= ~(1LL << (n%64));
 }
-*/
 
 int
-bitno(int32 b)
+bitno(uint64 b)
 {
        int i;
 
-       for(i=0; i<32; i++)
-               if(b & (1L<<i))
+       for(i=0; i<64; i++)
+               if(b & (1LL<<i))
                        return i;
        fatal("bad in bitno");
        return 0;
                        if(var[i].offset != 0)
                                fmtprint(fp, "%+lld", (vlong)var[i].offset);
                }
-               bits.b[i/32] &= ~(1L << (i%32));
+               biclr(&bits, i);
        }
        return 0;
 }
 
        Ecomplit = 1<<11,       // type in composite literal
 };
 
-#define        BITS    5
-#define        NVAR    (BITS*sizeof(uint32)*8)
+#define        BITS    3
+#define        NVAR    (BITS*sizeof(uint64)*8)
 
 typedef        struct  Bits    Bits;
 struct Bits
 {
-       uint32  b[BITS];
+       uint64  b[BITS];
 };
 
 EXTERN Bits    zbits;
 Bits   band(Bits a, Bits b);
 int    bany(Bits *a);
 int    beq(Bits a, Bits b);
-int    bitno(int32 b);
+int    bitno(uint64 b);
 Bits   blsh(uint n);
 Bits   bnot(Bits a);
 int    bnum(Bits a);
 Bits   bor(Bits a, Bits b);
-int    bset(Bits a, uint n);
+int    btest(Bits *a, uint n);
+void   biset(Bits *a, uint n);
+void   biclr(Bits *a, uint n);
 
 /*
  *     bv.c