runtime: replace GC programs with simpler encoding, faster decoder

author Russ Cox <rsc@golang.org>

Fri, 8 May 2015 05:43:18 +0000 (01:43 -0400)

committer Russ Cox <rsc@golang.org>

Sat, 16 May 2015 00:38:17 +0000 (00:38 +0000)
author Russ Cox <rsc@golang.org>
Fri, 8 May 2015 05:43:18 +0000 (01:43 -0400)
committer Russ Cox <rsc@golang.org>
Sat, 16 May 2015 00:38:17 +0000 (00:38 +0000)
diff --git a/src/cmd/dist/buildtool.go b/src/cmd/dist/buildtool.go

index e25367b25ccfa08aeb206d44ca804657fd7b6dc0..946229d8273f3978b7bc46c4ffe879bace484436 100644 (file)
--- a/src/cmd/dist/buildtool.go
+++ b/src/cmd/dist/buildtool.go
@@ -39,6 +39,7 @@ var bootstrapDirs = []string{
         "asm/internal/flags",
         "asm/internal/lex",
         "internal/asm",
+       "internal/gcprog",
         "internal/gc/big",
         "internal/gc",
         "internal/ld",
diff --git a/src/cmd/internal/gc/lex.go b/src/cmd/internal/gc/lex.go

index 92c079e1546c0087c150557c280b5d573271ff7b..f9211407fb6f1d8f824b808aac45f0f6ddc2c7d0 100644 (file)
--- a/src/cmd/internal/gc/lex.go
+++ b/src/cmd/internal/gc/lex.go
@@ -48,12 +48,13 @@ var debugtab = []struct {
         name string
         val  *int
  }{
+       {"append", &Debug_append},         // print information about append compilation
+       {"disablenil", &Disable_checknil}, // disable nil checks
+       {"gcprog", &Debug_gcprog},         // print dump of GC programs
         {"nil", &Debug_checknil},          // print information about nil checks
+       {"slice", &Debug_slice},           // print information about slice compilation
         {"typeassert", &Debug_typeassert}, // print information about type assertion inlining
-       {"disablenil", &Disable_checknil}, // disable nil checks
         {"wb", &Debug_wb},                 // print information about write barriers
-       {"append", &Debug_append},         // print information about append compilation
-       {"slice", &Debug_slice},           // print information about slice compilation
  }
  
  // Our own isdigit, isspace, isalpha, isalnum that take care
diff --git a/src/cmd/internal/gc/plive.go b/src/cmd/internal/gc/plive.go

index 977789f3e4a36d028b84df9d6c6d22d080b90dbd..b4d0699d1f52945ea3e3ecb93b99e11bd0ed0587 100644 (file)
--- a/src/cmd/internal/gc/plive.go
+++ b/src/cmd/internal/gc/plive.go
@@ -944,7 +944,7 @@ func onebitwalktype1(t *Type, xoffset *int64, bv Bvec) {
                 *xoffset += t.Width
  
         case TARRAY:
-               // The value of t->bound is -1 for slices types and >0 for
+               // The value of t->bound is -1 for slices types and >=0 for
                 // for fixed array types.  All other values are invalid.
                 if t.Bound < -1 {
                         Fatal("onebitwalktype1: invalid bound, %v", t)
diff --git a/src/cmd/internal/gc/reflect.go b/src/cmd/internal/gc/reflect.go

index 061b17b3ae3a36e37f038bf111e316fbf13e984b..6c0962f2582743457dde83cf5de6eb500b9d6d1f 100644 (file)
--- a/src/cmd/internal/gc/reflect.go
+++ b/src/cmd/internal/gc/reflect.go
@@ -5,8 +5,10 @@
  package gc
  
  import (
+       "cmd/internal/gcprog"
         "cmd/internal/obj"
         "fmt"
+       "os"
  )
  
  /*
@@ -771,6 +773,8 @@ func dcommontype(s *Sym, ot int, t *Type) int {
         // The linker magically takes the max of all the sizes.
         zero := Pkglookup("zerovalue", Runtimepkg)
  
+       gcsym, useGCProg, ptrdata := dgcsym(t)
+
         // We use size 0 here so we get the pointer to the zero value,
         // but don't allocate space for the zero value unless we need it.
         // TODO: how do we get this symbol into bss?  We really want
@@ -787,14 +791,14 @@ func dcommontype(s *Sym, ot int, t *Type) int {
         //              fieldAlign    uint8
         //              kind          uint8
         //              alg           unsafe.Pointer
-       //              gc            unsafe.Pointer
+       //              gcdata        unsafe.Pointer
         //              string        *string
         //              *extraType
         //              ptrToThis     *Type
         //              zero          unsafe.Pointer
         //      }
         ot = duintptr(s, ot, uint64(t.Width))
-       ot = duintptr(s, ot, uint64(typeptrdata(t)))
+       ot = duintptr(s, ot, uint64(ptrdata))
  
         ot = duint32(s, ot, typehash(t))
         ot = duint8(s, ot, 0) // unused
@@ -811,8 +815,6 @@ func dcommontype(s *Sym, ot int, t *Type) int {
         ot = duint8(s, ot, t.Align) // align
         ot = duint8(s, ot, t.Align) // fieldAlign
  
-       gcprog := usegcprog(t)
-
         i = kinds[t.Etype]
         if t.Etype == TARRAY && t.Bound < 0 {
                 i = obj.KindSlice
@@ -823,7 +825,7 @@ func dcommontype(s *Sym, ot int, t *Type) int {
         if isdirectiface(t) {
                 i |= obj.KindDirectIface
         }
-       if gcprog {
+       if useGCProg {
                 i |= obj.KindGCProg
         }
         ot = duint8(s, ot, uint8(i)) // kind
@@ -832,48 +834,7 @@ func dcommontype(s *Sym, ot int, t *Type) int {
         } else {
                 ot = dsymptr(s, ot, algsym, 0)
         }
-
-       // gc
-       if gcprog {
-               var gcprog1 *Sym
-               var gcprog0 *Sym
-               gengcprog(t, &gcprog0, &gcprog1)
-               if gcprog0 != nil {
-                       ot = dsymptr(s, ot, gcprog0, 0)
-               } else {
-                       ot = duintptr(s, ot, 0)
-               }
-               ot = dsymptr(s, ot, gcprog1, 0)
-       } else {
-               var gcmask [16]uint8
-               gengcmask(t, gcmask[:])
-               x1 := uint64(0)
-               for i := 0; i < 8; i++ {
-                       x1 = x1<<8 | uint64(gcmask[i])
-               }
-               var p string
-               if Widthptr == 4 {
-                       p = fmt.Sprintf("gcbits.0x%016x", x1)
-               } else {
-                       x2 := uint64(0)
-                       for i := 0; i < 8; i++ {
-                               x2 = x2<<8 | uint64(gcmask[i+8])
-                       }
-                       p = fmt.Sprintf("gcbits.0x%016x%016x", x1, x2)
-               }
-
-               sbits := Pkglookup(p, Runtimepkg)
-               if sbits.Flags&SymUniq == 0 {
-                       sbits.Flags |= SymUniq
-                       for i := 0; i < 2*Widthptr; i++ {
-                               duint8(sbits, i, gcmask[i])
-                       }
-                       ggloblsym(sbits, 2*int32(Widthptr), obj.DUPOK|obj.RODATA|obj.LOCAL)
-               }
-
-               ot = dsymptr(s, ot, sbits, 0)
-               ot = duintptr(s, ot, 0)
-       }
+       ot = dsymptr(s, ot, gcsym, 0)
  
         p := Tconv(t, obj.FmtLeft|obj.FmtUnsigned)
  
@@ -1419,228 +1380,193 @@ func dalgsym(t *Type) *Sym {
         return s
  }
  
-func usegcprog(t *Type) bool {
-       if !haspointers(t) {
-               return false
-       }
-       if t.Width == BADWIDTH {
-               dowidth(t)
+// maxPtrmaskBytes is the maximum length of a GC ptrmask bitmap,
+// which holds 1-bit entries describing where pointers are in a given type.
+// 16 bytes is enough to describe 128 pointer-sized words, 512 or 1024 bytes
+// depending on the system. Above this length, the GC information is
+// recorded as a GC program, which can express repetition compactly.
+// In either form, the information is used by the runtime to initialize the
+// heap bitmap, and for large types (like 128 or more words), they are
+// roughly the same speed. GC programs are never much larger and often
+// more compact. (If large arrays are involved, they can be arbitrarily more
+// compact.)
+//
+// The cutoff must be large enough that any allocation large enough to
+// use a GC program is large enough that it does not share heap bitmap
+// bytes with any other objects, allowing the GC program execution to
+// assume an aligned start and not use atomic operations. In the current
+// runtime, this means all malloc size classes larger than the cutoff must
+// be multiples of four words. On 32-bit systems that's 16 bytes, and
+// all size classes >= 16 bytes are 16-byte aligned, so no real constraint.
+// On 64-bit systems, that's 32 bytes, and 32-byte alignment is guaranteed
+// for size classes >= 256 bytes. On a 64-bit sytem, 256 bytes allocated
+// is 32 pointers, the bits for which fit in 4 bytes. So maxPtrmaskBytes
+// must be >= 4.
+//
+// We use 16 because the GC programs do have some constant overhead
+// to get started, and processing 128 pointers seems to be enough to
+// amortize that overhead well.
+const maxPtrmaskBytes = 16
+
+// dgcsym emits and returns a data symbol containing GC information for type t,
+// along with a boolean reporting whether the UseGCProg bit should be set in
+// the type kind, and the ptrdata field to record in the reflect type information.
+func dgcsym(t *Type) (sym *Sym, useGCProg bool, ptrdata int64) {
+       ptrdata = typeptrdata(t)
+       if ptrdata/int64(Widthptr) <= maxPtrmaskBytes*8 {
+               sym = dgcptrmask(t)
+               return
         }
  
-       // Calculate size of the unrolled GC mask.
-       nptr := typeptrdata(t) / int64(Widthptr)
+       useGCProg = true
+       sym, ptrdata = dgcprog(t)
+       return
+}
  
-       // Decide whether to use unrolled GC mask or GC program.
-       // We could use a more elaborate condition, but this seems to work well in practice.
-       // For small objects, the GC program can't give significant reduction.
-       return nptr > int64(2*Widthptr*8)
+// dgcptrmask emits and returns the symbol containing a pointer mask for type t.
+func dgcptrmask(t *Type) *Sym {
+       ptrmask := make([]byte, (typeptrdata(t)/int64(Widthptr)+7)/8)
+       fillptrmask(t, ptrmask)
+       p := fmt.Sprintf("gcbits.%x", ptrmask)
+
+       sym := Pkglookup(p, Runtimepkg)
+       if sym.Flags&SymUniq == 0 {
+               sym.Flags |= SymUniq
+               for i, x := range ptrmask {
+                       duint8(sym, i, x)
+               }
+               ggloblsym(sym, int32(len(ptrmask)), obj.DUPOK|obj.RODATA|obj.LOCAL)
+       }
+       return sym
  }
  
-// Generates GC bitmask (1 bit per word).
-func gengcmask(t *Type, gcmask []byte) {
-       for i := int64(0); i < 16; i++ {
-               gcmask[i] = 0
+// fillptrmask fills in ptrmask with 1s corresponding to the
+// word offsets in t that hold pointers.
+// ptrmask is assumed to fit at least typeptrdata(t)/Widthptr bits.
+func fillptrmask(t *Type, ptrmask []byte) {
+       for i := range ptrmask {
+               ptrmask[i] = 0
         }
         if !haspointers(t) {
                 return
         }
  
-       vec := bvalloc(int32(2 * Widthptr * 8))
+       vec := bvalloc(8 * int32(len(ptrmask)))
         xoffset := int64(0)
         onebitwalktype1(t, &xoffset, vec)
  
         nptr := typeptrdata(t) / int64(Widthptr)
         for i := int64(0); i < nptr; i++ {
                 if bvget(vec, int32(i)) == 1 {
-                       gcmask[i/8] |= 1 << (uint(i) % 8)
+                       ptrmask[i/8] |= 1 << (uint(i) % 8)
                 }
         }
  }
  
-// Helper object for generation of GC programs.
-type ProgGen struct {
-       s        *Sym
-       datasize int32
-       data     [256 / 8]uint8
-       ot       int64
+// dgcprog emits and returns the symbol containing a GC program for type t
+// along with the size of the data described by the program (in the range [typeptrdata(t), t.Width]).
+// In practice, the size is typeptrdata(t) except for non-trivial arrays.
+// For non-trivial arrays, the program describes the full t.Width size.
+func dgcprog(t *Type) (*Sym, int64) {
+       dowidth(t)
+       if t.Width == BADWIDTH {
+               Fatal("dgcprog: %v badwidth", t)
+       }
+       sym := typesymprefix(".gcprog", t)
+       var p GCProg
+       p.init(sym)
+       p.emit(t, 0)
+       offset := p.w.BitIndex() * int64(Widthptr)
+       p.end()
+       if ptrdata := typeptrdata(t); offset < ptrdata || offset > t.Width {
+               Fatal("dgcprog: %v: offset=%d but ptrdata=%d size=%d", t, offset, ptrdata, t.Width)
+       }
+       return sym, offset
  }
  
-func proggeninit(g *ProgGen, s *Sym) {
-       g.s = s
-       g.datasize = 0
-       g.ot = 0
-       g.data = [256 / 8]uint8{}
+type GCProg struct {
+       sym    *Sym
+       symoff int
+       w      gcprog.Writer
  }
  
-func proggenemit(g *ProgGen, v uint8) {
-       g.ot = int64(duint8(g.s, int(g.ot), v))
-}
+var Debug_gcprog int // set by -d gcprog
  
-// Emits insData block from g->data.
-func proggendataflush(g *ProgGen) {
-       if g.datasize == 0 {
-               return
+func (p *GCProg) init(sym *Sym) {
+       p.sym = sym
+       p.symoff = 4 // first 4 bytes hold program length
+       p.w.Init(p.writeByte)
+       if Debug_gcprog > 0 {
+               fmt.Fprintf(os.Stderr, "compile: start GCProg for %v\n", sym)
+               p.w.Debug(os.Stderr)
         }
-       proggenemit(g, obj.InsData)
-       proggenemit(g, uint8(g.datasize))
-       s := (g.datasize + 7) / 8
-       for i := int32(0); i < s; i++ {
-               proggenemit(g, g.data[i])
-       }
-       g.datasize = 0
-       g.data = [256 / 8]uint8{}
  }
  
-func proggendata(g *ProgGen, d uint8) {
-       g.data[g.datasize/8] |= d << uint(g.datasize%8)
-       g.datasize++
-       if g.datasize == 255 {
-               proggendataflush(g)
-       }
+func (p *GCProg) writeByte(x byte) {
+       p.symoff = duint8(p.sym, p.symoff, x)
  }
  
-// Skip v bytes due to alignment, etc.
-func proggenskip(g *ProgGen, off int64, v int64) {
-       for i := off; i < off+v; i++ {
-               if (i % int64(Widthptr)) == 0 {
-                       proggendata(g, 0)
-               }
+func (p *GCProg) end() {
+       p.w.End()
+       duint32(p.sym, 0, uint32(p.symoff-4))
+       ggloblsym(p.sym, int32(p.symoff), obj.DUPOK|obj.RODATA|obj.LOCAL)
+       if Debug_gcprog > 0 {
+               fmt.Fprintf(os.Stderr, "compile: end GCProg for %v\n", p.sym)
         }
  }
  
-// Emit insArray instruction.
-func proggenarray(g *ProgGen, len int64) {
-       proggendataflush(g)
-       proggenemit(g, obj.InsArray)
-       for i := int32(0); i < int32(Widthptr); i, len = i+1, len>>8 {
-               proggenemit(g, uint8(len))
+func (p *GCProg) emit(t *Type, offset int64) {
+       dowidth(t)
+       if !haspointers(t) {
+               return
         }
-}
-
-func proggenarrayend(g *ProgGen) {
-       proggendataflush(g)
-       proggenemit(g, obj.InsArrayEnd)
-}
-
-func proggenfini(g *ProgGen) int64 {
-       proggendataflush(g)
-       proggenemit(g, obj.InsEnd)
-       return g.ot
-}
-
-// Generates GC program for large types.
-func gengcprog(t *Type, pgc0 **Sym, pgc1 **Sym) {
-       nptr := (t.Width + int64(Widthptr) - 1) / int64(Widthptr)
-       size := nptr + 1 // unroll flag in the beginning, used by runtime (see runtime.markallocated)
-
-       // emity space in BSS for unrolled program
-       *pgc0 = nil
-
-       // Don't generate it if it's too large, runtime will unroll directly into GC bitmap.
-       if size <= obj.MaxGCMask {
-               gc0 := typesymprefix(".gc", t)
-               ggloblsym(gc0, int32(size), obj.DUPOK|obj.NOPTR)
-               *pgc0 = gc0
+       if t.Width == int64(Widthptr) {
+               p.w.Ptr(offset / int64(Widthptr))
+               return
         }
-
-       // program in RODATA
-       gc1 := typesymprefix(".gcprog", t)
-
-       var g ProgGen
-       proggeninit(&g, gc1)
-       xoffset := int64(0)
-       gengcprog1(&g, t, &xoffset)
-       ot := proggenfini(&g)
-       ggloblsym(gc1, int32(ot), obj.DUPOK|obj.RODATA)
-       *pgc1 = gc1
-}
-
-// Recursively walks type t and writes GC program into g.
-func gengcprog1(g *ProgGen, t *Type, xoffset *int64) {
         switch t.Etype {
-       case TINT8,
-               TUINT8,
-               TINT16,
-               TUINT16,
-               TINT32,
-               TUINT32,
-               TINT64,
-               TUINT64,
-               TINT,
-               TUINT,
-               TUINTPTR,
-               TBOOL,
-               TFLOAT32,
-               TFLOAT64,
-               TCOMPLEX64,
-               TCOMPLEX128:
-               proggenskip(g, *xoffset, t.Width)
-               *xoffset += t.Width
-
-       case TPTR32,
-               TPTR64,
-               TUNSAFEPTR,
-               TFUNC,
-               TCHAN,
-               TMAP:
-               proggendata(g, 1)
-               *xoffset += t.Width
+       default:
+               Fatal("GCProg.emit: unexpected type %v", t)
  
         case TSTRING:
-               proggendata(g, 1)
-               proggendata(g, 0)
-               *xoffset += t.Width
+               p.w.Ptr(offset / int64(Widthptr))
  
-               // Assuming IfacePointerOnly=1.
         case TINTER:
-               proggendata(g, 1)
-               proggendata(g, 1)
-               *xoffset += t.Width
+               p.w.Ptr(offset / int64(Widthptr))
+               p.w.Ptr(offset/int64(Widthptr) + 1)
  
         case TARRAY:
                 if Isslice(t) {
-                       proggendata(g, 1)
-                       proggendata(g, 0)
-                       proggendata(g, 0)
-               } else {
-                       t1 := t.Type
-                       if t1.Width == 0 {
-                       }
-                       // ignore
-                       if t.Bound <= 1 || t.Bound*t1.Width < int64(32*Widthptr) {
-                               for i := int64(0); i < t.Bound; i++ {
-                                       gengcprog1(g, t1, xoffset)
-                               }
-                       } else if !haspointers(t1) {
-                               n := t.Width
-                               n -= -*xoffset & (int64(Widthptr) - 1) // skip to next ptr boundary
-                               proggenarray(g, (n+int64(Widthptr)-1)/int64(Widthptr))
-                               proggendata(g, 0)
-                               proggenarrayend(g)
-                               *xoffset -= (n+int64(Widthptr)-1)/int64(Widthptr)*int64(Widthptr) - t.Width
-                       } else {
-                               proggenarray(g, t.Bound)
-                               gengcprog1(g, t1, xoffset)
-                               *xoffset += (t.Bound - 1) * t1.Width
-                               proggenarrayend(g)
+                       p.w.Ptr(offset / int64(Widthptr))
+                       return
+               }
+               if t.Bound == 0 {
+                       // should have been handled by haspointers check above
+                       Fatal("GCProg.emit: empty array")
+               }
+
+               // Flatten array-of-array-of-array to just a big array by multiplying counts.
+               count := t.Bound
+               elem := t.Type
+               for Isfixedarray(elem) {
+                       count *= elem.Bound
+                       elem = elem.Type
+               }
+
+               if !p.w.ShouldRepeat(elem.Width/int64(Widthptr), count) {
+                       // Cheaper to just emit the bits.
+                       for i := int64(0); i < count; i++ {
+                               p.emit(elem, offset+i*elem.Width)
                         }
+                       return
                 }
+               p.emit(elem, offset)
+               p.w.ZeroUntil((offset + elem.Width) / int64(Widthptr))
+               p.w.Repeat(elem.Width/int64(Widthptr), count-1)
  
         case TSTRUCT:
-               o := int64(0)
-               var fieldoffset int64
                 for t1 := t.Type; t1 != nil; t1 = t1.Down {
-                       fieldoffset = t1.Width
-                       proggenskip(g, *xoffset, fieldoffset-o)
-                       *xoffset += fieldoffset - o
-                       gengcprog1(g, t1.Type, xoffset)
-                       o = fieldoffset + t1.Type.Width
+                       p.emit(t1.Type, offset+t1.Width)
                 }
-
-               proggenskip(g, *xoffset, t.Width-o)
-               *xoffset += t.Width - o
-
-       default:
-               Fatal("gengcprog1: unexpected type, %v", t)
         }
  }
diff --git a/src/cmd/internal/gcprog/gcprog.go b/src/cmd/internal/gcprog/gcprog.go

new file mode 100644 (file)

index 0000000..5845f7d
--- /dev/null
+++ b/src/cmd/internal/gcprog/gcprog.go
@@ -0,0 +1,298 @@
+// Copyright 2015 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package gcprog implements an encoder for packed GC pointer bitmaps,
+// known as GC programs.
+//
+// Program Format
+//
+// The GC program encodes a sequence of 0 and 1 bits indicating scalar or pointer words in an object.
+// The encoding is a simple Lempel-Ziv program, with codes to emit literal bits and to repeat the
+// last n bits c times.
+//
+// The possible codes are:
+//
+//     00000000: stop
+//     0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes, least significant bit first
+//     10000000 n c: repeat the previous n bits c times; n, c are varints
+//     1nnnnnnn c: repeat the previous n bits c times; c is a varint
+//
+// The numbers n and c, when they follow a code, are encoded as varints
+// using the same encoding as encoding/binary's Uvarint.
+//
+package gcprog
+
+import (
+       "fmt"
+       "io"
+)
+
+const progMaxLiteral = 127 // maximum n for literal n bit code
+
+// A Writer is an encoder for GC programs.
+//
+// The typical use of a Writer is to call Init, maybe call Debug,
+// make a sequence of Ptr, Advance, Repeat, and Append calls
+// to describe the data type, and then finally call End.
+type Writer struct {
+       writeByte func(byte)
+       symoff    int
+       index     int64
+       b         [progMaxLiteral]byte
+       nb        int
+       debug     io.Writer
+       debugBuf  []byte
+}
+
+// Init initializes w to write a new GC program
+// by calling writeByte for each byte in the program.
+func (w *Writer) Init(writeByte func(byte)) {
+       w.writeByte = writeByte
+}
+
+// Debug causes the writer to print a debugging trace to out
+// during future calls to methods like Ptr, Advance, and End.
+// It also enables debugging checks during the encoding.
+func (w *Writer) Debug(out io.Writer) {
+       w.debug = out
+}
+
+// BitIndex returns the number of bits written to the bit stream so far.
+func (w *Writer) BitIndex() int64 {
+       return w.index
+}
+
+// byte writes the byte x to the output.
+func (w *Writer) byte(x byte) {
+       if w.debug != nil {
+               w.debugBuf = append(w.debugBuf, x)
+       }
+       w.writeByte(x)
+}
+
+// End marks the end of the program, writing any remaining bytes.
+func (w *Writer) End() {
+       w.flushlit()
+       w.byte(0)
+       if w.debug != nil {
+               index := progbits(w.debugBuf)
+               if index != w.index {
+                       println("gcprog: End wrote program for", index, "bits, but current index is", w.index)
+                       panic("gcprog: out of sync")
+               }
+       }
+}
+
+// Ptr emits a 1 into the bit stream at the given bit index.
+// that is, it records that the index'th word in the object memory is a pointer.
+// Any bits between the current index and the new index
+// are set to zero, meaning the corresponding words are scalars.
+func (w *Writer) Ptr(index int64) {
+       if index < w.index {
+               println("gcprog: Ptr at index", index, "but current index is", w.index)
+               panic("gcprog: invalid Ptr index")
+       }
+       w.ZeroUntil(index)
+       if w.debug != nil {
+               fmt.Fprintf(w.debug, "gcprog: ptr at %d\n", index)
+       }
+       w.lit(1)
+}
+
+// ShouldRepeat reports whether it would be worthwhile to
+// use a Repeat to describe c elements of n bits each,
+// compared to just emitting c copies of the n-bit description.
+func (w *Writer) ShouldRepeat(n, c int64) bool {
+       // Should we lay out the bits directly instead of
+       // encoding them as a repetition? Certainly if count==1,
+       // since there's nothing to repeat, but also if the total
+       // size of the plain pointer bits for the type will fit in
+       // 4 or fewer bytes, since using a repetition will require
+       // flushing the current bits plus at least one byte for
+       // the repeat size and one for the repeat count.
+       return c > 1 && c*n > 4*8
+}
+
+// Repeat emits an instruction to repeat the description
+// of the last n words c times (including the initial description, c+1 times in total).
+func (w *Writer) Repeat(n, c int64) {
+       if n == 0 || c == 0 {
+               return
+       }
+       w.flushlit()
+       if w.debug != nil {
+               fmt.Fprintf(w.debug, "gcprog: repeat %d × %d\n", n, c)
+       }
+       if n < 128 {
+               w.byte(0x80 | byte(n))
+       } else {
+               w.byte(0x80)
+               w.varint(n)
+       }
+       w.varint(c)
+       w.index += n * c
+}
+
+// ZeroUntil adds zeros to the bit stream until reaching the given index;
+// that is, it records that the words from the most recent pointer until
+// the index'th word are scalars.
+// ZeroUntil is usually called in preparation for a call to Repeat, Append, or End.
+func (w *Writer) ZeroUntil(index int64) {
+       if index < w.index {
+               println("gcprog: Advance", index, "but index is", w.index)
+               panic("gcprog: invalid Advance index")
+       }
+       skip := (index - w.index)
+       if skip == 0 {
+               return
+       }
+       if skip < 4*8 {
+               if w.debug != nil {
+                       fmt.Fprintf(w.debug, "gcprog: advance to %d by literals\n", index)
+               }
+               for i := int64(0); i < skip; i++ {
+                       w.lit(0)
+               }
+               return
+       }
+
+       if w.debug != nil {
+               fmt.Fprintf(w.debug, "gcprog: advance to %d by repeat\n", index)
+       }
+       w.lit(0)
+       w.flushlit()
+       w.Repeat(1, skip-1)
+}
+
+// Append emits the given GC program into the current output.
+// The caller asserts that the program emits n bits (describes n words),
+// and Append panics if that is not true.
+func (w *Writer) Append(prog []byte, n int64) {
+       w.flushlit()
+       if w.debug != nil {
+               fmt.Fprintf(w.debug, "gcprog: append prog for %d ptrs\n", n)
+               fmt.Fprintf(w.debug, "\t")
+       }
+       n1 := progbits(prog)
+       if n1 != n {
+               panic("gcprog: wrong bit count in append")
+       }
+       // The last byte of the prog terminates the program.
+       // Don't emit that, or else our own program will end.
+       for i, x := range prog[:len(prog)-1] {
+               if w.debug != nil {
+                       if i > 0 {
+                               fmt.Fprintf(w.debug, " ")
+                       }
+                       fmt.Fprintf(w.debug, "%02x", x)
+               }
+               w.byte(x)
+       }
+       if w.debug != nil {
+               fmt.Fprintf(w.debug, "\n")
+       }
+       w.index += n
+}
+
+// progbits returns the length of the bit stream encoded by the program p.
+func progbits(p []byte) int64 {
+       var n int64
+       for len(p) > 0 {
+               x := p[0]
+               p = p[1:]
+               if x == 0 {
+                       break
+               }
+               if x&0x80 == 0 {
+                       count := x &^ 0x80
+                       n += int64(count)
+                       p = p[(count+7)/8:]
+                       continue
+               }
+               nbit := int64(x &^ 0x80)
+               if nbit == 0 {
+                       nbit, p = readvarint(p)
+               }
+               var count int64
+               count, p = readvarint(p)
+               n += nbit * count
+       }
+       if len(p) > 0 {
+               println("gcprog: found end instruction after", n, "ptrs, with", len(p), "bytes remaining")
+               panic("gcprog: extra data at end of program")
+       }
+       return n
+}
+
+// readvarint reads a varint from p, returning the value and the remainder of p.
+func readvarint(p []byte) (int64, []byte) {
+       var v int64
+       var nb uint
+       for {
+               c := p[0]
+               p = p[1:]
+               v |= int64(c&^0x80) << nb
+               nb += 7
+               if c&0x80 == 0 {
+                       break
+               }
+       }
+       return v, p
+}
+
+// lit adds a single literal bit to w.
+func (w *Writer) lit(x byte) {
+       if w.nb == progMaxLiteral {
+               w.flushlit()
+       }
+       w.b[w.nb] = x
+       w.nb++
+       w.index++
+}
+
+// varint emits the varint encoding of x.
+func (w *Writer) varint(x int64) {
+       if x < 0 {
+               panic("gcprog: negative varint")
+       }
+       for x >= 0x80 {
+               w.byte(byte(0x80 | x))
+               x >>= 7
+       }
+       w.byte(byte(x))
+}
+
+// flushlit flushes any pending literal bits.
+func (w *Writer) flushlit() {
+       if w.nb == 0 {
+               return
+       }
+       if w.debug != nil {
+               fmt.Fprintf(w.debug, "gcprog: flush %d literals\n", w.nb)
+               fmt.Fprintf(w.debug, "\t%v\n", w.b[:w.nb])
+               fmt.Fprintf(w.debug, "\t%02x", byte(w.nb))
+       }
+       w.byte(byte(w.nb))
+       var bits uint8
+       for i := 0; i < w.nb; i++ {
+               bits |= w.b[i] << uint(i%8)
+               if (i+1)%8 == 0 {
+                       if w.debug != nil {
+                               fmt.Fprintf(w.debug, " %02x", bits)
+                       }
+                       w.byte(bits)
+                       bits = 0
+               }
+       }
+       if w.nb%8 != 0 {
+               if w.debug != nil {
+                       fmt.Fprintf(w.debug, " %02x", bits)
+               }
+               w.byte(bits)
+       }
+       if w.debug != nil {
+               fmt.Fprintf(w.debug, "\n")
+       }
+       w.nb = 0
+}
diff --git a/src/cmd/internal/ld/data.go b/src/cmd/internal/ld/data.go

index b65b667f98b6fdaede693de855c403539ba02f88..f9aacf0e196f0b594c7e6e765e6c2d23491dda0c 100644 (file)
--- a/src/cmd/internal/ld/data.go
+++ b/src/cmd/internal/ld/data.go
@@ -32,9 +32,11 @@
  package ld
  
  import (
+       "cmd/internal/gcprog"
         "cmd/internal/obj"
         "fmt"
         "log"
+       "os"
         "strings"
  )
  
@@ -1044,141 +1046,65 @@ func maxalign(s *LSym, type_ int) int32 {
         return max
  }
  
-// Helper object for building GC type programs.
-type ProgGen struct {
-       s        *LSym
-       datasize int32
-       data     [256 / 8]uint8
-       pos      int64
-}
-
-func proggeninit(g *ProgGen, s *LSym) {
-       g.s = s
-       g.datasize = 0
-       g.pos = 0
-       g.data = [256 / 8]uint8{}
-}
+const debugGCProg = false
  
-func proggenemit(g *ProgGen, v uint8) {
-       Adduint8(Ctxt, g.s, v)
+type GCProg struct {
+       sym *LSym
+       w   gcprog.Writer
  }
  
-// Writes insData block from g->data.
-func proggendataflush(g *ProgGen) {
-       if g.datasize == 0 {
-               return
+func (p *GCProg) Init(name string) {
+       p.sym = Linklookup(Ctxt, name, 0)
+       p.w.Init(p.writeByte)
+       if debugGCProg {
+               fmt.Fprintf(os.Stderr, "ld: start GCProg %s\n", name)
+               p.w.Debug(os.Stderr)
         }
-       proggenemit(g, obj.InsData)
-       proggenemit(g, uint8(g.datasize))
-       s := (g.datasize + 7) / 8
-       for i := int32(0); i < s; i++ {
-               proggenemit(g, g.data[i])
-       }
-       g.datasize = 0
-       g.data = [256 / 8]uint8{}
  }
  
-func proggendata(g *ProgGen, d uint8) {
-       g.data[g.datasize/8] |= d << uint(g.datasize%8)
-       g.datasize++
-       if g.datasize == 255 {
-               proggendataflush(g)
-       }
+func (p *GCProg) writeByte(x byte) {
+       Adduint8(Ctxt, p.sym, x)
  }
  
-// Skip v bytes due to alignment, etc.
-func proggenskip(g *ProgGen, off int64, v int64) {
-       for i := off; i < off+v; i++ {
-               if (i % int64(Thearch.Ptrsize)) == 0 {
-                       proggendata(g, 0)
-               }
+func (p *GCProg) End(size int64) {
+       p.w.ZeroUntil(size / int64(Thearch.Ptrsize))
+       p.w.End()
+       if debugGCProg {
+               fmt.Fprintf(os.Stderr, "ld: end GCProg\n")
         }
  }
  
-// Emit insArray instruction.
-func proggenarray(g *ProgGen, length int64) {
-       var i int32
-
-       proggendataflush(g)
-       proggenemit(g, obj.InsArray)
-       for i = 0; i < int32(Thearch.Ptrsize); i, length = i+1, length>>8 {
-               proggenemit(g, uint8(length))
-       }
-}
-
-func proggenarrayend(g *ProgGen) {
-       proggendataflush(g)
-       proggenemit(g, obj.InsArrayEnd)
-}
-
-func proggenfini(g *ProgGen, size int64) {
-       proggenskip(g, g.pos, size-g.pos)
-       proggendataflush(g)
-       proggenemit(g, obj.InsEnd)
-}
-
-// This function generates GC pointer info for global variables.
-func proggenaddsym(g *ProgGen, s *LSym) {
-       if s.Size == 0 {
+func (p *GCProg) AddSym(s *LSym) {
+       typ := s.Gotype
+       // Things without pointers should be in SNOPTRDATA or SNOPTRBSS;
+       // everything we see should have pointers and should therefore have a type.
+       if typ == nil {
+               Diag("missing Go type information for global symbol: %s size %d", s.Name, int(s.Size))
                 return
         }
  
-       // Skip alignment hole from the previous symbol.
-       proggenskip(g, g.pos, s.Value-g.pos)
-       g.pos = s.Value
+       ptrsize := int64(Thearch.Ptrsize)
+       nptr := decodetype_ptrdata(typ) / ptrsize
  
-       if s.Gotype == nil && s.Size >= int64(Thearch.Ptrsize) {
-               Diag("missing Go type information for global symbol: %s size %d", s.Name, int(s.Size))
-               return
+       if debugGCProg {
+               fmt.Fprintf(os.Stderr, "gcprog sym: %s at %d (ptr=%d+%d)\n", s.Name, s.Value, s.Value/ptrsize, nptr)
         }
  
-       if s.Gotype == nil || decodetype_noptr(s.Gotype) != 0 || s.Size < int64(Thearch.Ptrsize) || s.Name[0] == '.' {
-               // no scan
-               if s.Size < int64(32*Thearch.Ptrsize) {
-                       // Emit small symbols as data.
-                       // This case also handles unaligned and tiny symbols, so tread carefully.
-                       for i := s.Value; i < s.Value+s.Size; i++ {
-                               if (i % int64(Thearch.Ptrsize)) == 0 {
-                                       proggendata(g, 0)
-                               }
+       if decodetype_usegcprog(typ) == 0 {
+               // Copy pointers from mask into program.
+               mask := decodetype_gcmask(typ)
+               for i := int64(0); i < nptr; i++ {
+                       if (mask[i/8]>>uint(i%8))&1 != 0 {
+                               p.w.Ptr(s.Value/ptrsize + i)
                         }
-               } else {
-                       // Emit large symbols as array.
-                       if (s.Size%int64(Thearch.Ptrsize) != 0) || (g.pos%int64(Thearch.Ptrsize) != 0) {
-                               Diag("proggenaddsym: unaligned noscan symbol %s: size=%d pos=%d", s.Name, s.Size, g.pos)
-                       }
-                       proggenarray(g, s.Size/int64(Thearch.Ptrsize))
-                       proggendata(g, 0)
-                       proggenarrayend(g)
-               }
-               g.pos = s.Value + s.Size
-       } else if decodetype_usegcprog(s.Gotype) != 0 {
-               // gc program, copy directly
-               // TODO(rsc): Maybe someday the gc program will only describe
-               // the first decodetype_ptrdata(s.Gotype) bytes instead of the full size.
-               proggendataflush(g)
-               gcprog := decodetype_gcprog(s.Gotype)
-               size := decodetype_size(s.Gotype)
-               if (size%int64(Thearch.Ptrsize) != 0) || (g.pos%int64(Thearch.Ptrsize) != 0) {
-                       Diag("proggenaddsym: unaligned gcprog symbol %s: size=%d pos=%d", s.Name, size, g.pos)
-               }
-               for i := int64(0); i < int64(len(gcprog.P)-1); i++ {
-                       proggenemit(g, uint8(gcprog.P[i]))
-               }
-               g.pos = s.Value + size
-       } else {
-               // gc mask, it's small so emit as data
-               mask := decodetype_gcmask(s.Gotype)
-               ptrdata := decodetype_ptrdata(s.Gotype)
-               if (ptrdata%int64(Thearch.Ptrsize) != 0) || (g.pos%int64(Thearch.Ptrsize) != 0) {
-                       Diag("proggenaddsym: unaligned gcmask symbol %s: size=%d pos=%d", s.Name, ptrdata, g.pos)
                 }
-               for i := int64(0); i < ptrdata; i += int64(Thearch.Ptrsize) {
-                       word := uint(i / int64(Thearch.Ptrsize))
-                       proggendata(g, (mask[word/8]>>(word%8))&1)
-               }
-               g.pos = s.Value + ptrdata
+               return
         }
+
+       // Copy program.
+       prog := decodetype_gcprog(typ)
+       p.w.ZeroUntil(s.Value / ptrsize)
+       p.w.Append(prog.P[4:prog.Size], nptr)
  }
  
  func growdatsize(datsizep *int64, s *LSym) {
@@ -1386,15 +1312,13 @@ func dodata() {
  
         /* data */
         sect = addsection(&Segdata, ".data", 06)
-
         sect.Align = maxalign(s, obj.SBSS-1)
         datsize = Rnd(datsize, int64(sect.Align))
         sect.Vaddr = uint64(datsize)
         Linklookup(Ctxt, "runtime.data", 0).Sect = sect
         Linklookup(Ctxt, "runtime.edata", 0).Sect = sect
-       gcdata := Linklookup(Ctxt, "runtime.gcdata", 0)
-       var gen ProgGen
-       proggeninit(&gen, gcdata)
+       var gc GCProg
+       gc.Init("runtime.gcdata")
         for ; s != nil && s.Type < obj.SBSS; s = s.Next {
                 if s.Type == obj.SINITARR {
                         Ctxt.Cursym = s
@@ -1405,33 +1329,30 @@ func dodata() {
                 s.Type = obj.SDATA
                 datsize = aligndatsize(datsize, s)
                 s.Value = int64(uint64(datsize) - sect.Vaddr)
-               proggenaddsym(&gen, s) // gc
+               gc.AddSym(s)
                 growdatsize(&datsize, s)
         }
-
         sect.Length = uint64(datsize) - sect.Vaddr
-       proggenfini(&gen, int64(sect.Length)) // gc
+       gc.End(int64(sect.Length))
  
         /* bss */
         sect = addsection(&Segdata, ".bss", 06)
-
         sect.Align = maxalign(s, obj.SNOPTRBSS-1)
         datsize = Rnd(datsize, int64(sect.Align))
         sect.Vaddr = uint64(datsize)
         Linklookup(Ctxt, "runtime.bss", 0).Sect = sect
         Linklookup(Ctxt, "runtime.ebss", 0).Sect = sect
-       gcbss := Linklookup(Ctxt, "runtime.gcbss", 0)
-       proggeninit(&gen, gcbss)
+       gc = GCProg{}
+       gc.Init("runtime.gcbss")
         for ; s != nil && s.Type < obj.SNOPTRBSS; s = s.Next {
                 s.Sect = sect
                 datsize = aligndatsize(datsize, s)
                 s.Value = int64(uint64(datsize) - sect.Vaddr)
-               proggenaddsym(&gen, s) // gc
+               gc.AddSym(s)
                 growdatsize(&datsize, s)
         }
-
         sect.Length = uint64(datsize) - sect.Vaddr
-       proggenfini(&gen, int64(sect.Length)) // gc
+       gc.End(int64(sect.Length))
  
         /* pointer-free bss */
         sect = addsection(&Segdata, ".noptrbss", 06)
diff --git a/src/cmd/internal/ld/decodesym.go b/src/cmd/internal/ld/decodesym.go

index b9333857fdc5dac52f79fafbae78ed9249219db5..fcc664dde735da365f044d6492384a3f915fc229 100644 (file)
--- a/src/cmd/internal/ld/decodesym.go
+++ b/src/cmd/internal/ld/decodesym.go
@@ -44,7 +44,7 @@ func decode_inuxi(p []byte, sz int) uint64 {
  // commonsize returns the size of the common prefix for all type
  // structures (runtime._type).
  func commonsize() int {
-       return 9*Thearch.Ptrsize + 8
+       return 8*Thearch.Ptrsize + 8
  }
  
  // Type.commonType.kind
@@ -79,7 +79,7 @@ func decodetype_gcprog(s *LSym) *LSym {
                 x := "type..gcprog." + s.Name[5:]
                 return Linklookup(Ctxt, x, 0)
         }
-       return decode_reloc_sym(s, 2*int32(Thearch.Ptrsize)+8+2*int32(Thearch.Ptrsize))
+       return decode_reloc_sym(s, 2*int32(Thearch.Ptrsize)+8+1*int32(Thearch.Ptrsize))
  }
  
  func decodetype_gcprog_shlib(s *LSym) uint64 {
diff --git a/src/cmd/internal/ld/lib.go b/src/cmd/internal/ld/lib.go

index a0d03ef22d98c1b6c521f090d4000209180ce030..753e8eebd8c4b5f933215e0278588efde44dd800 100644 (file)
--- a/src/cmd/internal/ld/lib.go
+++ b/src/cmd/internal/ld/lib.go
@@ -1200,7 +1200,7 @@ func ldshlibsyms(shlib string) {
                 if strings.HasPrefix(s.Name, "_") {
                         continue
                 }
-               if strings.HasPrefix(s.Name, "runtime.gcbits.0x") {
+               if strings.HasPrefix(s.Name, "runtime.gcbits.") {
                         gcmasks[s.Value] = readelfsymboldata(f, &s)
                 }
                 if s.Name == "go.link.abihashbytes" {
diff --git a/src/cmd/internal/ld/link.go b/src/cmd/internal/ld/link.go

index a314ca13706f09bdf86928f9ed7a20340a1c35b0..3098147819f118f321753e948cbf846112f7a534 100644 (file)
--- a/src/cmd/internal/ld/link.go
+++ b/src/cmd/internal/ld/link.go
@@ -34,6 +34,7 @@ import (
         "cmd/internal/obj"
         "debug/elf"
         "encoding/binary"
+       "fmt"
  )
  
  type LSym struct {
@@ -86,6 +87,13 @@ type LSym struct {
         gcmask      []byte
  }
  
+func (s *LSym) String() string {
+       if s.Version == 0 {
+               return s.Name
+       }
+       return fmt.Sprintf("%s<%d>", s.Name, s.Version)
+}
+
  type Reloc struct {
         Off     int32
         Siz     uint8
diff --git a/src/cmd/internal/ld/objfile.go b/src/cmd/internal/ld/objfile.go

index 3d59323dbaececbd4cb2cd2faf468ad047478198..613fcb2a40c39de19a9b273a91ba37382712dbe3 100644 (file)
--- a/src/cmd/internal/ld/objfile.go
+++ b/src/cmd/internal/ld/objfile.go
@@ -347,7 +347,7 @@ func rdsym(ctxt *Link, f *obj.Biobuf, pkg string) *LSym {
                         s.Reachable = false
                 }
         }
-       if v == 0 && strings.HasPrefix(s.Name, "runtime.gcbits.0x") {
+       if v == 0 && strings.HasPrefix(s.Name, "runtime.gcbits.") {
                 s.Local = true
         }
         return s
diff --git a/src/reflect/all_test.go b/src/reflect/all_test.go

index 373583d4719acdf800a57f466b08494b8863cc27..9214577c2ef9bf963a58785833dca35adb6d4116 100644 (file)
--- a/src/reflect/all_test.go
+++ b/src/reflect/all_test.go
@@ -4395,11 +4395,9 @@ type funcLayoutTest struct {
  var funcLayoutTests []funcLayoutTest
  
  func init() {
-       var argAlign = PtrSize
-       var naclExtra []byte
+       var argAlign uintptr = PtrSize
         if runtime.GOARCH == "amd64p32" {
                 argAlign = 2 * PtrSize
-               naclExtra = append(naclExtra, 0)
         }
         roundup := func(x uintptr, a uintptr) uintptr {
                 return (x + a - 1) / a * a
@@ -4413,16 +4411,14 @@ func init() {
                         4 * PtrSize,
                         4 * PtrSize,
                         []byte{1, 0, 1},
-                       []byte{1, 0, 1, 0, 1, 0},
+                       []byte{1, 0, 1, 0, 1},
                 })
  
-       var r, s []byte
+       var r []byte
         if PtrSize == 4 {
                 r = []byte{0, 0, 0, 1}
-               s = append([]byte{0, 0, 0, 1, 0}, naclExtra...)
         } else {
                 r = []byte{0, 0, 1}
-               s = []byte{0, 0, 1, 0}
         }
         funcLayoutTests = append(funcLayoutTests,
                 funcLayoutTest{
@@ -4432,7 +4428,7 @@ func init() {
                         roundup(3*4, PtrSize) + PtrSize + 2,
                         roundup(roundup(3*4, PtrSize)+PtrSize+2, argAlign),
                         r,
-                       s,
+                       r,
                 })
  
         funcLayoutTests = append(funcLayoutTests,
@@ -4469,7 +4465,7 @@ func init() {
                         3 * PtrSize,
                         roundup(3*PtrSize, argAlign),
                         []byte{1, 0, 1},
-                       append([]byte{1, 0, 1}, naclExtra...),
+                       []byte{1, 0, 1},
                 })
  
         funcLayoutTests = append(funcLayoutTests,
@@ -4480,7 +4476,7 @@ func init() {
                         PtrSize,
                         roundup(PtrSize, argAlign),
                         []byte{},
-                       append([]byte{0}, naclExtra...),
+                       []byte{},
                 })
  
         funcLayoutTests = append(funcLayoutTests,
@@ -4491,7 +4487,7 @@ func init() {
                         0,
                         0,
                         []byte{},
-                       []byte{0},
+                       []byte{},
                 })
  
         funcLayoutTests = append(funcLayoutTests,
@@ -4502,7 +4498,7 @@ func init() {
                         2 * PtrSize,
                         2 * PtrSize,
                         []byte{1},
-                       []byte{1, 0},
+                       []byte{1},
                         // Note: this one is tricky, as the receiver is not a pointer.  But we
                         // pass the receiver by reference to the autogenerated pointer-receiver
                         // version of the function.
@@ -4532,3 +4528,118 @@ func TestFuncLayout(t *testing.T) {
                 }
         }
  }
+
+func verifyGCBits(t *testing.T, typ Type, bits []byte) {
+       heapBits := GCBits(New(typ).Interface())
+       if !bytes.Equal(heapBits, bits) {
+               t.Errorf("heapBits incorrect for %v\nhave %v\nwant %v", typ, heapBits, bits)
+       }
+}
+
+func TestGCBits(t *testing.T) {
+       verifyGCBits(t, TypeOf((*byte)(nil)), []byte{1})
+
+       // Building blocks for types seen by the compiler (like [2]Xscalar).
+       // The compiler will create the type structures for the derived types,
+       // including their GC metadata.
+       type Xscalar struct{ x uintptr }
+       type Xptr struct{ x *byte }
+       type Xptrscalar struct {
+               *byte
+               uintptr
+       }
+       type Xscalarptr struct {
+               uintptr
+               *byte
+       }
+
+       var Tscalar, Tptr, Tscalarptr, Tptrscalar Type
+       {
+               // Building blocks for types constructed by reflect.
+               // This code is in a separate block so that code below
+               // cannot accidentally refer to these.
+               // The compiler must NOT see types derived from these
+               // (for example, [2]Scalar must NOT appear in the program),
+               // or else reflect will use it instead of having to construct one.
+               // The goal is to test the construction.
+               type Scalar struct{ x uintptr }
+               type Ptr struct{ x *byte }
+               type Ptrscalar struct {
+                       *byte
+                       uintptr
+               }
+               type Scalarptr struct {
+                       uintptr
+                       *byte
+               }
+               Tscalar = TypeOf(Scalar{})
+               Tptr = TypeOf(Ptr{})
+               Tscalarptr = TypeOf(Scalarptr{})
+               Tptrscalar = TypeOf(Ptrscalar{})
+       }
+
+       empty := []byte{}
+
+       verifyGCBits(t, TypeOf(Xscalar{}), empty)
+       verifyGCBits(t, Tscalar, empty)
+       verifyGCBits(t, TypeOf(Xptr{}), lit(1))
+       verifyGCBits(t, Tptr, lit(1))
+       verifyGCBits(t, TypeOf(Xscalarptr{}), lit(0, 1))
+       verifyGCBits(t, Tscalarptr, lit(0, 1))
+       verifyGCBits(t, TypeOf(Xptrscalar{}), lit(1))
+       verifyGCBits(t, Tptrscalar, lit(1))
+
+       verifyGCBits(t, TypeOf([0]Xptr{}), empty)
+       verifyGCBits(t, ArrayOf(0, Tptr), empty)
+       verifyGCBits(t, TypeOf([1]Xptrscalar{}), lit(1))
+       verifyGCBits(t, ArrayOf(1, Tptrscalar), lit(1))
+       verifyGCBits(t, TypeOf([2]Xscalar{}), empty)
+       verifyGCBits(t, ArrayOf(2, Tscalar), empty)
+       verifyGCBits(t, TypeOf([100]Xscalar{}), empty)
+       verifyGCBits(t, ArrayOf(100, Tscalar), empty)
+       verifyGCBits(t, TypeOf([2]Xptr{}), lit(1, 1))
+       verifyGCBits(t, ArrayOf(2, Tptr), lit(1, 1))
+       verifyGCBits(t, TypeOf([100]Xptr{}), rep(100, lit(1)))
+       verifyGCBits(t, ArrayOf(100, Tptr), rep(100, lit(1)))
+       verifyGCBits(t, TypeOf([2]Xscalarptr{}), lit(0, 1, 0, 1))
+       verifyGCBits(t, ArrayOf(2, Tscalarptr), lit(0, 1, 0, 1))
+       verifyGCBits(t, TypeOf([100]Xscalarptr{}), rep(100, lit(0, 1)))
+       verifyGCBits(t, ArrayOf(100, Tscalarptr), rep(100, lit(0, 1)))
+       verifyGCBits(t, TypeOf([2]Xptrscalar{}), lit(1, 0, 1))
+       verifyGCBits(t, ArrayOf(2, Tptrscalar), lit(1, 0, 1))
+       verifyGCBits(t, TypeOf([100]Xptrscalar{}), rep(100, lit(1, 0)))
+       verifyGCBits(t, ArrayOf(100, Tptrscalar), rep(100, lit(1, 0)))
+       verifyGCBits(t, TypeOf([1][100]Xptrscalar{}), rep(100, lit(1, 0)))
+       verifyGCBits(t, ArrayOf(1, ArrayOf(100, Tptrscalar)), rep(100, lit(1, 0)))
+       verifyGCBits(t, TypeOf([2][100]Xptrscalar{}), rep(200, lit(1, 0)))
+       verifyGCBits(t, ArrayOf(2, ArrayOf(100, Tptrscalar)), rep(200, lit(1, 0)))
+
+       verifyGCBits(t, TypeOf((chan [100]Xscalar)(nil)), lit(1))
+       verifyGCBits(t, ChanOf(BothDir, ArrayOf(100, Tscalar)), lit(1))
+
+       verifyGCBits(t, TypeOf((func([100]Xscalarptr))(nil)), lit(1))
+       //verifyGCBits(t, FuncOf([]Type{ArrayOf(100, Tscalarptr)}, nil, false), lit(1))
+
+       verifyGCBits(t, TypeOf((map[[100]Xscalarptr]Xscalar)(nil)), lit(1))
+       verifyGCBits(t, MapOf(ArrayOf(100, Tscalarptr), Tscalar), lit(1))
+
+       verifyGCBits(t, TypeOf((*[100]Xscalar)(nil)), lit(1))
+       verifyGCBits(t, PtrTo(ArrayOf(100, Tscalar)), lit(1))
+
+       verifyGCBits(t, TypeOf(([][100]Xscalar)(nil)), lit(1))
+       verifyGCBits(t, SliceOf(ArrayOf(100, Tscalar)), lit(1))
+
+       hdr := make([]byte, 8/PtrSize)
+       verifyGCBits(t, MapBucketOf(Tscalar, Tptr), join(hdr, rep(8, lit(0)), rep(8, lit(1)), lit(1)))
+       verifyGCBits(t, MapBucketOf(Tscalarptr, Tptr), join(hdr, rep(8, lit(0, 1)), rep(8, lit(1)), lit(1)))
+       verifyGCBits(t, MapBucketOf(Tscalar, Tscalar), empty)
+       verifyGCBits(t, MapBucketOf(ArrayOf(2, Tscalarptr), ArrayOf(3, Tptrscalar)), join(hdr, rep(8*2, lit(0, 1)), rep(8*3, lit(1, 0)), lit(1)))
+       verifyGCBits(t, MapBucketOf(ArrayOf(64/PtrSize, Tscalarptr), ArrayOf(64/PtrSize, Tptrscalar)), join(hdr, rep(8*64/PtrSize, lit(0, 1)), rep(8*64/PtrSize, lit(1, 0)), lit(1)))
+       verifyGCBits(t, MapBucketOf(ArrayOf(64/PtrSize+1, Tscalarptr), ArrayOf(64/PtrSize, Tptrscalar)), join(hdr, rep(8, lit(1)), rep(8*64/PtrSize, lit(1, 0)), lit(1)))
+       verifyGCBits(t, MapBucketOf(ArrayOf(64/PtrSize, Tscalarptr), ArrayOf(64/PtrSize+1, Tptrscalar)), join(hdr, rep(8*64/PtrSize, lit(0, 1)), rep(8, lit(1)), lit(1)))
+       verifyGCBits(t, MapBucketOf(ArrayOf(64/PtrSize+1, Tscalarptr), ArrayOf(64/PtrSize+1, Tptrscalar)), join(hdr, rep(8, lit(1)), rep(8, lit(1)), lit(1)))
+}
+
+func rep(n int, b []byte) []byte { return bytes.Repeat(b, n) }
+func join(b ...[]byte) []byte    { return bytes.Join(b, nil) }
+func lit(x ...byte) []byte       { return x }
diff --git a/src/reflect/export_test.go b/src/reflect/export_test.go

index 6748eba3d1298f0c5c569aeb43165b1dec5a3330..a4e2e7e28c725227d67f7b494ca328fcb20029cd 100644 (file)
--- a/src/reflect/export_test.go
+++ b/src/reflect/export_test.go
@@ -4,6 +4,8 @@
  
  package reflect
  
+import "unsafe"
+
  // MakeRO returns a copy of v with the read-only flag set.
  func MakeRO(v Value) Value {
         v.flag |= flagRO
@@ -28,14 +30,14 @@ func FuncLayout(t Type, rcvr Type) (frametype Type, argSize, retOffset uintptr,
                 ft, argSize, retOffset, s, _ = funcLayout(t.(*rtype), nil)
         }
         frametype = ft
-       for i := uint32(0); i < s.n; i += 2 {
-               stack = append(stack, s.data[i/8]>>(i%8)&3)
+       for i := uint32(0); i < s.n; i++ {
+               stack = append(stack, s.data[i/8]>>(i%8)&1)
         }
         if ft.kind&kindGCProg != 0 {
                 panic("can't handle gc programs")
         }
-       gcdata := (*[1000]byte)(ft.gc[0])
-       for i := uintptr(0); i < ft.size/ptrSize; i++ {
+       gcdata := (*[1000]byte)(unsafe.Pointer(ft.gcdata))
+       for i := uintptr(0); i < ft.ptrdata/ptrSize; i++ {
                 gc = append(gc, gcdata[i/8]>>(i%8)&1)
         }
         ptrs = ft.kind&kindNoPointers == 0
@@ -51,3 +53,11 @@ func TypeLinks() []string {
         }
         return r
  }
+
+var GCBits = gcbits
+
+func gcbits(interface{}) []byte // provided by runtime
+
+func MapBucketOf(x, y Type) Type {
+       return bucketOf(x.(*rtype), y.(*rtype))
+}
diff --git a/src/reflect/type.go b/src/reflect/type.go

index bffe2595ddb5e544333c60072e9845a631045fd8..f39ba52a4262153f2a841d6f4b760cab6eaa4345 100644 (file)
--- a/src/reflect/type.go
+++ b/src/reflect/type.go
@@ -247,17 +247,17 @@ const (
  type rtype struct {
         size          uintptr
         ptrdata       uintptr
-       hash          uint32            // hash of type; avoids computation in hash tables
-       _             uint8             // unused/padding
-       align         uint8             // alignment of variable with this type
-       fieldAlign    uint8             // alignment of struct field with this type
-       kind          uint8             // enumeration for C
-       alg           *typeAlg          // algorithm table
-       gc            [2]unsafe.Pointer // garbage collection data
-       string        *string           // string form; unnecessary but undeniably useful
-       *uncommonType                   // (relatively) uncommon fields
-       ptrToThis     *rtype            // type for pointer to this type, if used in binary or has methods
-       zero          unsafe.Pointer    // pointer to zero value
+       hash          uint32         // hash of type; avoids computation in hash tables
+       _             uint8          // unused/padding
+       align         uint8          // alignment of variable with this type
+       fieldAlign    uint8          // alignment of struct field with this type
+       kind          uint8          // enumeration for C
+       alg           *typeAlg       // algorithm table
+       gcdata        *byte          // garbage collection data
+       string        *string        // string form; unnecessary but undeniably useful
+       *uncommonType                // (relatively) uncommon fields
+       ptrToThis     *rtype         // type for pointer to this type, if used in binary or has methods
+       zero          unsafe.Pointer // pointer to zero value
  }
  
  // a copy of runtime.typeAlg
@@ -1670,111 +1670,14 @@ func isReflexive(t *rtype) bool {
         }
  }
  
-// gcProg is a helper type for generatation of GC pointer info.
-type gcProg struct {
-       gc       []byte
-       size     uintptr // size of type in bytes
-       hasPtr   bool
-       lastZero uintptr // largest offset of a zero-byte field
-}
-
-func (gc *gcProg) append(v byte) {
-       gc.align(unsafe.Sizeof(uintptr(0)))
-       gc.appendWord(v)
-}
-
-// Appends t's type info to the current program.
-func (gc *gcProg) appendProg(t *rtype) {
-       gc.align(uintptr(t.align))
-       if !t.pointers() {
-               gc.size += t.size
-               if t.size == 0 {
-                       gc.lastZero = gc.size
-               }
-               return
-       }
-       switch t.Kind() {
-       default:
-               panic("reflect: non-pointer type marked as having pointers")
-       case Ptr, UnsafePointer, Chan, Func, Map:
-               gc.appendWord(1)
-       case Slice:
-               gc.appendWord(1)
-               gc.appendWord(0)
-               gc.appendWord(0)
-       case String:
-               gc.appendWord(1)
-               gc.appendWord(0)
-       case Array:
-               c := t.Len()
-               e := t.Elem().common()
-               for i := 0; i < c; i++ {
-                       gc.appendProg(e)
-               }
-       case Interface:
-               gc.appendWord(1)
-               gc.appendWord(1)
-       case Struct:
-               oldsize := gc.size
-               c := t.NumField()
-               for i := 0; i < c; i++ {
-                       gc.appendProg(t.Field(i).Type.common())
-               }
-               if gc.size > oldsize+t.size {
-                       panic("reflect: struct components are larger than the struct itself")
-               }
-               gc.size = oldsize + t.size
-       }
-}
-
-func (gc *gcProg) appendWord(v byte) {
-       ptrsize := unsafe.Sizeof(uintptr(0))
-       if gc.size%ptrsize != 0 {
-               panic("reflect: unaligned GC program")
-       }
-       nptr := gc.size / ptrsize
-       for uintptr(len(gc.gc)) <= nptr/8 {
-               gc.gc = append(gc.gc, 0)
-       }
-       gc.gc[nptr/8] |= v << (nptr % 8)
-       gc.size += ptrsize
-       if v == 1 {
-               gc.hasPtr = true
-       }
-}
-
-func (gc *gcProg) finalize() (unsafe.Pointer, bool) {
-       if gc.size == 0 {
-               return nil, false
-       }
-       if gc.lastZero == gc.size {
-               gc.size++
-       }
-       ptrsize := unsafe.Sizeof(uintptr(0))
-       gc.align(ptrsize)
-       nptr := gc.size / ptrsize
-       for uintptr(len(gc.gc)) <= nptr/8 {
-               gc.gc = append(gc.gc, 0)
-       }
-       return unsafe.Pointer(&gc.gc[0]), gc.hasPtr
-}
-
-func extractGCWord(gc []byte, i uintptr) byte {
-       return gc[i/8] >> (i % 8) & 1
-}
-
-func (gc *gcProg) align(a uintptr) {
-       gc.size = align(gc.size, a)
-}
-
  // Make sure these routines stay in sync with ../../runtime/hashmap.go!
  // These types exist only for GC, so we only fill out GC relevant info.
  // Currently, that's just size and the GC program.  We also fill in string
  // for possible debugging use.
  const (
-       bucketSize = 8
-       maxKeySize = 128
-       maxValSize = 128
+       bucketSize uintptr = 8
+       maxKeySize uintptr = 128
+       maxValSize uintptr = 128
  )
  
  func bucketOf(ktyp, etyp *rtype) *rtype {
@@ -1791,33 +1694,70 @@ func bucketOf(ktyp, etyp *rtype) *rtype {
         if etyp.size > maxValSize {
                 etyp = PtrTo(etyp).(*rtype)
         }
-       ptrsize := unsafe.Sizeof(uintptr(0))
  
-       var gc gcProg
-       // topbits
-       for i := 0; i < int(bucketSize*unsafe.Sizeof(uint8(0))/ptrsize); i++ {
-               gc.append(0)
-       }
-       // keys
-       for i := 0; i < bucketSize; i++ {
-               gc.appendProg(ktyp)
-       }
-       // values
-       for i := 0; i < bucketSize; i++ {
-               gc.appendProg(etyp)
+       // Prepare GC data if any.
+       // A bucket is at most bucketSize*(1+maxKeySize+maxValSize)+2*ptrSize bytes,
+       // or 2072 bytes, or 259 pointer-size words, or 33 bytes of pointer bitmap.
+       // Normally the enforced limit on pointer maps is 16 bytes,
+       // but larger ones are acceptable, 33 bytes isn't too too big,
+       // and it's easier to generate a pointer bitmap than a GC program.
+       // Note that since the key and value are known to be <= 128 bytes,
+       // they're guaranteed to have bitmaps instead of GC programs.
+       var gcdata *byte
+       var ptrdata uintptr
+       if kind != kindNoPointers {
+               nptr := (bucketSize*(1+ktyp.size+etyp.size) + ptrSize) / ptrSize
+               mask := make([]byte, (nptr+7)/8)
+               base := bucketSize / ptrSize
+
+               if ktyp.kind&kindNoPointers == 0 {
+                       if ktyp.kind&kindGCProg != 0 {
+                               panic("reflect: unexpected GC program in MapOf")
+                       }
+                       kmask := (*[16]byte)(unsafe.Pointer(ktyp.gcdata))
+                       for i := uintptr(0); i < ktyp.size/ptrSize; i++ {
+                               if (kmask[i/8]>>(i%8))&1 != 0 {
+                                       for j := uintptr(0); j < bucketSize; j++ {
+                                               word := base + j*ktyp.size/ptrSize + i
+                                               mask[word/8] |= 1 << (word % 8)
+                                       }
+                               }
+                       }
+               }
+               base += bucketSize * ktyp.size / ptrSize
+
+               if etyp.kind&kindNoPointers == 0 {
+                       if etyp.kind&kindGCProg != 0 {
+                               panic("reflect: unexpected GC program in MapOf")
+                       }
+                       emask := (*[16]byte)(unsafe.Pointer(etyp.gcdata))
+                       for i := uintptr(0); i < etyp.size/ptrSize; i++ {
+                               if (emask[i/8]>>(i%8))&1 != 0 {
+                                       for j := uintptr(0); j < bucketSize; j++ {
+                                               word := base + j*etyp.size/ptrSize + i
+                                               mask[word/8] |= 1 << (word % 8)
+                                       }
+                               }
+                       }
+               }
+               base += bucketSize * etyp.size / ptrSize
+
+               word := base
+               mask[word/8] |= 1 << (word % 8)
+               gcdata = &mask[0]
+               ptrdata = (word + 1) * ptrSize
         }
-       // overflow
-       gc.append(1)
-       ptrdata := gc.size
+
+       size := bucketSize*(1+ktyp.size+etyp.size) + ptrSize
         if runtime.GOARCH == "amd64p32" {
-               gc.append(0)
+               size += ptrSize
         }
  
         b := new(rtype)
-       b.size = gc.size
+       b.size = size
         b.ptrdata = ptrdata
         b.kind = kind
-       b.gc[0], _ = gc.finalize()
+       b.gcdata = gcdata
         s := "bucket(" + *ktyp.string + "," + *etyp.string + ")"
         b.string = &s
         return b
@@ -1911,19 +1851,83 @@ func ArrayOf(count int, elem Type) Type {
         array.len = uintptr(count)
         array.slice = slice.(*rtype)
  
-       var gc gcProg
-       // TODO(sbinet): count could be possibly very large.
-       // use insArray directives from ../runtime/mbitmap.go.
-       for i := 0; i < count; i++ {
-               gc.appendProg(typ)
-       }
-
-       var hasPtr bool
-       array.gc[0], hasPtr = gc.finalize()
-       if !hasPtr {
+       array.kind &^= kindNoPointers
+       switch {
+       case typ.kind&kindNoPointers != 0 || array.size == 0:
+               // No pointers.
                 array.kind |= kindNoPointers
-       } else {
-               array.kind &^= kindNoPointers
+               array.gcdata = nil
+               array.ptrdata = 0
+
+       case count == 1:
+               // In memory, 1-element array looks just like the element.
+               array.kind |= typ.kind & kindGCProg
+               array.gcdata = typ.gcdata
+               array.ptrdata = typ.ptrdata
+
+       case typ.kind&kindGCProg == 0 && array.size <= 16*8*ptrSize:
+               // Element is small with pointer mask; array is still small.
+               // Create direct pointer mask by turning each 1 bit in elem
+               // into count 1 bits in larger mask.
+               mask := make([]byte, (array.ptrdata/ptrSize+7)/8)
+               elemMask := (*[1 << 30]byte)(unsafe.Pointer(typ.gcdata))[:]
+               elemWords := typ.size / ptrSize
+               for j := uintptr(0); j < typ.ptrdata/ptrSize; j++ {
+                       if (elemMask[j/8]>>(j%8))&1 != 0 {
+                               for i := uintptr(0); i < array.len; i++ {
+                                       k := i*elemWords + j
+                                       mask[k/8] |= 1 << (k % 8)
+                               }
+                       }
+               }
+               array.gcdata = &mask[0]
+
+       default:
+               // Create program that emits one element
+               // and then repeats to make the array.
+               prog := []byte{0, 0, 0, 0} // will be length of prog
+               elemGC := (*[1 << 30]byte)(unsafe.Pointer(typ.gcdata))[:]
+               elemPtrs := typ.ptrdata / ptrSize
+               if typ.kind&kindGCProg == 0 {
+                       // Element is small with pointer mask; use as literal bits.
+                       mask := elemGC
+                       // Emit 120-bit chunks of full bytes (max is 127 but we avoid using partial bytes).
+                       var n uintptr
+                       for n = elemPtrs; n > 120; n -= 120 {
+                               prog = append(prog, 120)
+                               prog = append(prog, mask[:15]...)
+                               mask = mask[15:]
+                       }
+                       prog = append(prog, byte(n))
+                       prog = append(prog, mask[:(n+7)/8]...)
+               } else {
+                       // Element has GC program; emit one element.
+                       elemProg := elemGC[4 : 4+*(*uint32)(unsafe.Pointer(&elemGC[0]))-1]
+                       prog = append(prog, elemProg...)
+               }
+               // Pad from ptrdata to size.
+               elemWords := typ.size / ptrSize
+               if elemPtrs < elemWords {
+                       // Emit literal 0 bit, then repeat as needed.
+                       prog = append(prog, 0x01, 0x00)
+                       if elemPtrs+1 < elemWords {
+                               prog = append(prog, 0x81)
+                               prog = appendVarint(prog, elemWords-elemPtrs-1)
+                       }
+               }
+               // Repeat count-1 times.
+               if elemWords < 0x80 {
+                       prog = append(prog, byte(elemWords|0x80))
+               } else {
+                       prog = append(prog, 0x80)
+                       prog = appendVarint(prog, elemWords)
+               }
+               prog = appendVarint(prog, uintptr(count)-1)
+               prog = append(prog, 0)
+               *(*uint32)(unsafe.Pointer(&prog[0])) = uint32(len(prog) - 4)
+               array.kind |= kindGCProg
+               array.gcdata = &prog[0]
+               array.ptrdata = array.size // overestimate but ok; must match program
         }
  
         etyp := typ.common()
@@ -1967,6 +1971,14 @@ func ArrayOf(count int, elem Type) Type {
         return cachePut(ckey, &array.rtype)
  }
  
+func appendVarint(x []byte, v uintptr) []byte {
+       for ; v >= 0x80; v >>= 7 {
+               x = append(x, byte(v|0x80))
+       }
+       x = append(x, byte(v))
+       return x
+}
+
  // toType converts from a *rtype to a Type that can be returned
  // to the client of package reflect. In gc, the only concern is that
  // a nil *rtype must be replaced by a nil Type, but in gccgo this
@@ -2003,7 +2015,7 @@ var layoutCache struct {
  // The returned type exists only for GC, so we only fill out GC relevant info.
  // Currently, that's just size and the GC program.  We also fill in
  // the name for possible debugging use.
-func funcLayout(t *rtype, rcvr *rtype) (frametype *rtype, argSize, retOffset uintptr, stack *bitVector, framePool *sync.Pool) {
+func funcLayout(t *rtype, rcvr *rtype) (frametype *rtype, argSize, retOffset uintptr, stk *bitVector, framePool *sync.Pool) {
         if t.Kind() != Func {
                 panic("reflect: funcLayout of non-func type")
         }
@@ -2026,53 +2038,47 @@ func funcLayout(t *rtype, rcvr *rtype) (frametype *rtype, argSize, retOffset uin
         tt := (*funcType)(unsafe.Pointer(t))
  
         // compute gc program & stack bitmap for arguments
-       stack = new(bitVector)
-       var gc gcProg
+       ptrmap := new(bitVector)
         var offset uintptr
         if rcvr != nil {
                 // Reflect uses the "interface" calling convention for
                 // methods, where receivers take one word of argument
                 // space no matter how big they actually are.
-               if ifaceIndir(rcvr) {
-                       // we pass a pointer to the receiver.
-                       gc.append(1)
-                       stack.append2(1)
-               } else if rcvr.pointers() {
-                       // rcvr is a one-word pointer object.  Its gc program
-                       // is just what we need here.
-                       gc.append(1)
-                       stack.append2(1)
-               } else {
-                       gc.append(0)
-                       stack.append2(0)
+               if ifaceIndir(rcvr) || rcvr.pointers() {
+                       ptrmap.append(1)
                 }
                 offset += ptrSize
         }
         for _, arg := range tt.in {
-               gc.appendProg(arg)
-               addTypeBits(stack, &offset, arg)
+               offset += -offset & uintptr(arg.align-1)
+               addTypeBits(ptrmap, offset, arg)
+               offset += arg.size
         }
-       argSize = gc.size
+       argN := ptrmap.n
+       argSize = offset
         if runtime.GOARCH == "amd64p32" {
-               gc.align(8)
+               offset += -offset & (8 - 1)
         }
-       gc.align(ptrSize)
-       retOffset = gc.size
+       offset += -offset & (ptrSize - 1)
+       retOffset = offset
         for _, res := range tt.out {
-               gc.appendProg(res)
-               // stack map does not need result bits
+               offset += -offset & uintptr(res.align-1)
+               addTypeBits(ptrmap, offset, res)
+               offset += res.size
         }
-       gc.align(ptrSize)
+       offset += -offset & (ptrSize - 1)
  
         // build dummy rtype holding gc program
         x := new(rtype)
-       x.size = gc.size
-       x.ptrdata = gc.size // over-approximation
-       var hasPtr bool
-       x.gc[0], hasPtr = gc.finalize()
-       if !hasPtr {
+       x.size = offset
+       x.ptrdata = uintptr(ptrmap.n) * ptrSize
+       if ptrmap.n > 0 {
+               x.gcdata = &ptrmap.data[0]
+       } else {
                 x.kind |= kindNoPointers
         }
+       ptrmap.n = argN
+
         var s string
         if rcvr != nil {
                 s = "methodargs(" + *rcvr.string + ")(" + *t.string + ")"
@@ -2092,11 +2098,11 @@ func funcLayout(t *rtype, rcvr *rtype) (frametype *rtype, argSize, retOffset uin
                 t:         x,
                 argSize:   argSize,
                 retOffset: retOffset,
-               stack:     stack,
+               stack:     ptrmap,
                 framePool: framePool,
         }
         layoutCache.Unlock()
-       return x, argSize, retOffset, stack, framePool
+       return x, argSize, retOffset, ptrmap, framePool
  }
  
  // ifaceIndir reports whether t is stored indirectly in an interface value.
@@ -2110,56 +2116,49 @@ type bitVector struct {
         data []byte
  }
  
-// append a bit pair to the bitmap.
-func (bv *bitVector) append2(bits uint8) {
-       // assume bv.n is a multiple of 2, since append2 is the only operation.
+// append a bit to the bitmap.
+func (bv *bitVector) append(bit uint8) {
         if bv.n%8 == 0 {
                 bv.data = append(bv.data, 0)
         }
-       bv.data[bv.n/8] |= bits << (bv.n % 8)
-       bv.n += 2
+       bv.data[bv.n/8] |= bit << (bv.n % 8)
+       bv.n++
  }
  
-func addTypeBits(bv *bitVector, offset *uintptr, t *rtype) {
-       *offset = align(*offset, uintptr(t.align))
-       if !t.pointers() {
-               *offset += t.size
+func addTypeBits(bv *bitVector, offset uintptr, t *rtype) {
+       if t.kind&kindNoPointers != 0 {
                 return
         }
  
         switch Kind(t.kind & kindMask) {
         case Chan, Func, Map, Ptr, Slice, String, UnsafePointer:
                 // 1 pointer at start of representation
-               for bv.n < 2*uint32(*offset/uintptr(ptrSize)) {
-                       bv.append2(0)
+               for bv.n < uint32(offset/uintptr(ptrSize)) {
+                       bv.append(0)
                 }
-               bv.append2(1)
+               bv.append(1)
  
         case Interface:
                 // 2 pointers
-               for bv.n < 2*uint32(*offset/uintptr(ptrSize)) {
-                       bv.append2(0)
+               for bv.n < uint32(offset/uintptr(ptrSize)) {
+                       bv.append(0)
                 }
-               bv.append2(1)
-               bv.append2(1)
+               bv.append(1)
+               bv.append(1)
  
         case Array:
                 // repeat inner type
                 tt := (*arrayType)(unsafe.Pointer(t))
                 for i := 0; i < int(tt.len); i++ {
-                       addTypeBits(bv, offset, tt.elem)
+                       addTypeBits(bv, offset+uintptr(i)*tt.elem.size, tt.elem)
                 }
  
         case Struct:
                 // apply fields
                 tt := (*structType)(unsafe.Pointer(t))
-               start := *offset
                 for i := range tt.fields {
                         f := &tt.fields[i]
-                       off := start + f.offset
-                       addTypeBits(bv, &off, f.typ)
+                       addTypeBits(bv, offset+f.offset, f.typ)
                 }
         }
-
-       *offset += t.size
  }
diff --git a/src/reflect/value.go b/src/reflect/value.go

index fb9e85a8cf3446031318bce7d7b7a3c53e410388..91c38c9ffc1e27759ed61c956965e676a5f40b6d 100644 (file)
--- a/src/reflect/value.go
+++ b/src/reflect/value.go
@@ -10,7 +10,7 @@ import (
         "unsafe"
  )
  
-const ptrSize = unsafe.Sizeof((*byte)(nil))
+const ptrSize = 4 << (^uintptr(0) >> 63) // unsafe.Sizeof(uintptr(0)) but an ideal const
  const cannotSet = "cannot set value obtained from unexported struct field"
  
  // Value is the reflection interface to a Go value.
diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go

index 546c331614ac0fec7837312b6f6eecd6cbd2756f..39bb4217b34571fc219467aa6b99b3cfdf39ffd4 100644 (file)
--- a/src/runtime/mbitmap.go
+++ b/src/runtime/mbitmap.go
@@ -547,8 +547,6 @@ func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) {
         }
  }
  
-// TODO(rsc): Clean up the next two functions.
-
  // heapBitsSetType records that the new allocation [x, x+size)
  // holds in [x, x+dataSize) one or more values of type typ.
  // (The number of values is given by dataSize / typ.size.)
@@ -569,11 +567,7 @@ func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) {
  // but if the start or end of x shares a bitmap byte with an adjacent
  // object, the GC marker is racing with updates to those object's mark bits.
  func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
-       const doubleCheck = false // slow but helpful; enable to test modifications to this function
-
-       // From here till marked label marking the object as allocated
-       // and storing type info in the GC bitmap.
-       h := heapBitsForAddr(x)
+       const doubleCheck = false // slow but helpful; enable to test modifications to this code
  
         // dataSize is always size rounded up to the next malloc size class,
         // except in the case of allocating a defer block, in which case
@@ -593,6 +587,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
                 // (non-pointers are aggregated into tinySize allocations),
                 // initSpan sets the pointer bits for us. Nothing to do here.
                 if doubleCheck {
+                       h := heapBitsForAddr(x)
                         if !h.isPointer() {
                                 throw("heapBitsSetType: pointer bit missing")
                         }
@@ -600,33 +595,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
                 return
         }
  
-       ptrmask := (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask
-       if typ.kind&kindGCProg != 0 {
-               nptr := typ.ptrdata / ptrSize
-               masksize := (nptr + 7) / 8
-               masksize++ // unroll flag in the beginning
-               if masksize > maxGCMask && typ.gc[1] != 0 {
-                       // write barriers have not been updated to deal with this case yet.
-                       throw("maxGCMask too small for now")
-                       // If the mask is too large, unroll the program directly
-                       // into the GC bitmap. It's 7 times slower than copying
-                       // from the pre-unrolled mask, but saves 1/16 of type size
-                       // memory for the mask.
-                       systemstack(func() {
-                               unrollgcproginplace_m(unsafe.Pointer(x), typ, size, dataSize)
-                       })
-                       return
-               }
-               // Check whether the program is already unrolled
-               // by checking if the unroll flag byte is set
-               maskword := uintptr(atomicloadp(unsafe.Pointer(ptrmask)))
-               if *(*uint8)(unsafe.Pointer(&maskword)) == 0 {
-                       systemstack(func() {
-                               unrollgcprog_m(typ)
-                       })
-               }
-               ptrmask = add1(ptrmask) // skip the unroll flag byte
-       }
+       h := heapBitsForAddr(x)
+       ptrmask := typ.gcdata // start of 1-bit pointer mask (or GC program, handled below)
  
         // Heap bitmap bits for 2-word object are only 4 bits,
         // so also shared with objects next to it; use atomic updates.
@@ -661,6 +631,12 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
                         return
                 }
                 // Otherwise typ.size must be 2*ptrSize, and typ.kind&kindGCProg == 0.
+               if doubleCheck {
+                       if typ.size != 2*ptrSize || typ.kind&kindGCProg != 0 {
+                               print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n")
+                               throw("heapBitsSetType")
+                       }
+               }
                 b := uint32(*ptrmask)
                 hb := b & 3
                 if gcphase == _GCoff {
@@ -678,16 +654,49 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
         // This is a lot of lines of code, but it compiles into relatively few
         // machine instructions.
  
-       // Ptrmask buffer.
         var (
+               // Ptrmask input.
                 p     *byte   // last ptrmask byte read
                 b     uintptr // ptrmask bits already loaded
                 nb    uintptr // number of bits in b at next read
                 endp  *byte   // final ptrmask byte to read (then repeat)
                 endnb uintptr // number of valid bits in *endp
                 pbits uintptr // alternate source of bits
+
+               // Heap bitmap output.
+               w     uintptr // words processed
+               nw    uintptr // number of words to process
+               hbitp *byte   // next heap bitmap byte to write
+               hb    uintptr // bits being prepared for *hbitp
         )
  
+       hbitp = h.bitp
+
+       // Handle GC program. Delayed until this part of the code
+       // so that we can use the same double-checking mechanism
+       // as the 1-bit case. Nothing above could have encountered
+       // GC programs: the cases were all too small.
+       if typ.kind&kindGCProg != 0 {
+               heapBitsSetTypeGCProg(h, typ.ptrdata, typ.size, dataSize, size, addb(typ.gcdata, 4))
+               if doubleCheck {
+                       // Double-check the heap bits written by GC program
+                       // by running the GC program to create a 1-bit pointer mask
+                       // and then jumping to the double-check code below.
+                       // This doesn't catch bugs shared between the 1-bit and 4-bit
+                       // GC program execution, but it does catch mistakes specific
+                       // to just one of those and bugs in heapBitsSetTypeGCProg's
+                       // implementation of arrays.
+                       lock(&debugPtrmask.lock)
+                       if debugPtrmask.data == nil {
+                               debugPtrmask.data = (*byte)(persistentalloc(1<<20, 1, &memstats.other_sys))
+                       }
+                       ptrmask = debugPtrmask.data
+                       runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1)
+                       goto Phase4
+               }
+               return
+       }
+
         // Note about sizes:
         //
         // typ.size is the number of words in the object,
@@ -780,8 +789,6 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
                 nb = 8
         }
  
-       var w uintptr  // words processed
-       var nw uintptr // number of words to process
         if typ.size == dataSize {
                 // Single entry: can stop once we reach the non-pointer data.
                 nw = typ.ptrdata / ptrSize
@@ -803,9 +810,6 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
                 nw = 2
         }
  
-       hbitp := h.bitp // next heap bitmap byte to write
-       var hb uintptr  // bits being preapred for *h.bitp
-
         // Phase 1: Special case for leading byte (shift==0) or half-byte (shift==4).
         // The leading byte is special because it contains the bits for words 0 and 1,
         // which do not have the marked bits set.
@@ -967,10 +971,11 @@ Phase3:
                 }
         }
  
+Phase4:
         // Phase 4: all done, but perhaps double check.
         if doubleCheck {
                 end := heapBitsForAddr(x + size)
-               if hbitp != end.bitp || (w == nw+2) != (end.shift == 2) {
+               if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) {
                         println("ended at wrong bitmap byte for", *typ._string, "x", dataSize/typ.size)
                         print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
                         print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
@@ -986,15 +991,16 @@ Phase3:
                 nptr := typ.ptrdata / ptrSize
                 ndata := typ.size / ptrSize
                 count := dataSize / typ.size
-               for i := uintptr(0); i <= dataSize/ptrSize; i++ {
+               totalptr := ((count-1)*typ.size + typ.ptrdata) / ptrSize
+               for i := uintptr(0); i < size/ptrSize; i++ {
                         j := i % ndata
                         var have, want uint8
-                       if i == dataSize/ptrSize && dataSize >= size {
-                               break
-                       }
                         have = (*h.bitp >> h.shift) & (bitPointer | bitMarked)
-                       if i == dataSize/ptrSize || i/ndata == count-1 && j >= nptr {
-                               want = 0 // dead marker
+                       if i >= totalptr {
+                               want = 0 // deadmarker
+                               if typ.kind&kindGCProg != 0 && i < (totalptr+3)/4*4 {
+                                       want = bitMarked
+                               }
                         } else {
                                 if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 {
                                         want |= bitPointer
@@ -1008,177 +1014,483 @@ Phase3:
                         if have != want {
                                 println("mismatch writing bits for", *typ._string, "x", dataSize/typ.size)
                                 print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
+                               print("kindGCProg=", typ.kind&kindGCProg != 0, "\n")
                                 print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
                                 h0 := heapBitsForAddr(x)
                                 print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
                                 print("current bits h.bitp=", h.bitp, " h.shift=", h.shift, " *h.bitp=", hex(*h.bitp), "\n")
                                 print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n")
                                 println("at word", i, "offset", i*ptrSize, "have", have, "want", want)
+                               if typ.kind&kindGCProg != 0 {
+                                       println("GC program:")
+                                       dumpGCProg(addb(typ.gcdata, 4))
+                               }
                                 throw("bad heapBitsSetType")
                         }
                         h = h.next()
                 }
+               if ptrmask == debugPtrmask.data {
+                       unlock(&debugPtrmask.lock)
+               }
         }
  }
  
-// GC type info programs
+var debugPtrmask struct {
+       lock mutex
+       data *byte
+}
+
+// heapBitsSetTypeGCProg implements heapBitsSetType using a GC program.
+// progSize is the size of the memory described by the program.
+// elemSize is the size of the element that the GC program describes (a prefix of).
+// dataSize is the total size of the intended data, a multiple of elemSize.
+// allocSize is the total size of the allocated memory.
  //
-// TODO(rsc): Clean up and enable.
+// GC programs are only used for large allocations.
+// heapBitsSetType requires that allocSize is a multiple of 4 words,
+// so that the relevant bitmap bytes are not shared with surrounding
+// objects and need not be accessed with atomic instructions.
+func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize uintptr, prog *byte) {
+       if ptrSize == 8 && allocSize%(4*ptrSize) != 0 {
+               // Alignment will be wrong.
+               throw("heapBitsSetTypeGCProg: small allocation")
+       }
+       var totalBits uintptr
+       if elemSize == dataSize {
+               totalBits = runGCProg(prog, nil, h.bitp, 2)
+               if totalBits*ptrSize != progSize {
+                       println("runtime: heapBitsSetTypeGCProg: total bits", totalBits, "but progSize", progSize)
+                       throw("heapBitsSetTypeGCProg: unexpected bit count")
+               }
+       } else {
+               count := dataSize / elemSize
+
+               // Piece together program trailer to run after prog that does:
+               //      literal(0)
+               //      repeat(1, elemSize-progSize-1) // zeros to fill element size
+               //      repeat(elemSize, count-1) // repeat that element for count
+               // This zero-pads the data remaining in the first element and then
+               // repeats that first element to fill the array.
+               var trailer [40]byte // 3 varints (max 10 each) + some bytes
+               i := 0
+               if n := elemSize/ptrSize - progSize/ptrSize; n > 0 {
+                       // literal(0)
+                       trailer[i] = 0x01
+                       i++
+                       trailer[i] = 0
+                       i++
+                       if n > 1 {
+                               // repeat(1, n-1)
+                               trailer[i] = 0x81
+                               i++
+                               n--
+                               for ; n >= 0x80; n >>= 7 {
+                                       trailer[i] = byte(n | 0x80)
+                                       i++
+                               }
+                               trailer[i] = byte(n)
+                               i++
+                       }
+               }
+               // repeat(elemSize/ptrSize, count-1)
+               trailer[i] = 0x80
+               i++
+               n := elemSize / ptrSize
+               for ; n >= 0x80; n >>= 7 {
+                       trailer[i] = byte(n | 0x80)
+                       i++
+               }
+               trailer[i] = byte(n)
+               i++
+               n = count
+               for ; n >= 0x80; n >>= 7 {
+                       trailer[i] = byte(n | 0x80)
+                       i++
+               }
+               trailer[i] = byte(n)
+               i++
+               trailer[i] = 0
+               i++
+
+               runGCProg(prog, &trailer[0], h.bitp, 2)
+
+               // Even though we filled in the full array just now,
+               // record that we only filled in up to the ptrdata of the
+               // last element. This will cause the code below to
+               // memclr the dead section of the final array element,
+               // so that scanobject can stop early in the final element.
+               totalBits = (elemSize*(count-1) + progSize) / ptrSize
+       }
+       endProg := unsafe.Pointer(subtractb(h.bitp, (totalBits+3)/4))
+       endAlloc := unsafe.Pointer(subtractb(h.bitp, allocSize/heapBitmapScale))
+       memclr(add(endAlloc, 1), uintptr(endProg)-uintptr(endAlloc))
+}
  
-const (
-       // GC type info programs.
-       // The programs allow to store type info required for GC in a compact form.
-       // Most importantly arrays take O(1) space instead of O(n).
-       // The program grammar is:
-       //
-       // Program = {Block} "insEnd"
-       // Block = Data | Array
-       // Data = "insData" DataSize DataBlock
-       // DataSize = int // size of the DataBlock in bit pairs, 1 byte
-       // DataBlock = binary // dense GC mask (2 bits per word) of size ]DataSize/4[ bytes
-       // Array = "insArray" ArrayLen Block "insArrayEnd"
-       // ArrayLen = int // length of the array, 8 bytes (4 bytes for 32-bit arch)
-       //
-       // Each instruction (insData, insArray, etc) is 1 byte.
-       // For example, for type struct { x []byte; y [20]struct{ z int; w *byte }; }
-       // the program looks as:
-       //
-       // insData 3 (typePointer typeScalar typeScalar)
-       //      insArray 20 insData 2 (typeScalar typePointer) insArrayEnd insEnd
-       //
-       // Total size of the program is 17 bytes (13 bytes on 32-bits).
-       // The corresponding GC mask would take 43 bytes (it would be repeated
-       // because the type has odd number of words).
-       insData = 1 + iota
-       insArray
-       insArrayEnd
-       insEnd
-
-       // 64 bytes cover objects of size 1024/512 on 64/32 bits, respectively.
-       maxGCMask = 65536 // TODO(rsc): change back to 64
-)
+// progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
+// size the size of the region described by prog, in bytes.
+// The resulting bitvector will have no more than size/ptrSize bits.
+func progToPointerMask(prog *byte, size uintptr) bitvector {
+       n := (size/ptrSize + 7) / 8
+       x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1]
+       x[len(x)-1] = 0xa1 // overflow check sentinel
+       n = runGCProg(prog, nil, &x[0], 1)
+       if x[len(x)-1] != 0xa1 {
+               throw("progToPointerMask: overflow")
+       }
+       return bitvector{int32(n), &x[0]}
+}
  
-// Recursively unrolls GC program in prog.
-// mask is where to store the result.
-// If inplace is true, store the result not in mask but in the heap bitmap for mask.
-// ppos is a pointer to position in mask, in bits.
-// sparse says to generate 4-bits per word mask for heap (1-bit for data/bss otherwise).
-//go:nowritebarrier
-func unrollgcprog1(maskp *byte, prog *byte, ppos *uintptr, inplace bool) *byte {
-       pos := *ppos
-       mask := (*[1 << 30]byte)(unsafe.Pointer(maskp))
+// Packed GC pointer bitmaps, aka GC programs.
+//
+// For large types containing arrays, the type information has a
+// natural repetition that can be encoded to save space in the
+// binary and in the memory representation of the type information.
+//
+// The encoding is a simple Lempel-Ziv style bytecode machine
+// with the following instructions:
+//
+//     00000000: stop
+//     0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes
+//     10000000 n c: repeat the previous n bits c times; n, c are varints
+//     1nnnnnnn c: repeat the previous n bits c times; c is a varint
+
+// runGCProg executes the GC program prog, and then trailer if non-nil,
+// writing to dst with entries of the given size.
+// If size == 1, dst is a 1-bit pointer mask laid out moving forward from dst.
+// If size == 2, dst is the 2-bit heap bitmap, and writes move backward
+// starting at dst (because the heap bitmap does). In this case, the caller guarantees
+// that only whole bytes in dst need to be written.
+//
+// runGCProg returns the number of 1- or 2-bit entries written to memory.
+func runGCProg(prog, trailer, dst *byte, size int) uintptr {
+       dstStart := dst
+
+       // Bits waiting to be written to memory.
+       var bits uintptr
+       var nbits uintptr
+
+       p := prog
+Run:
         for {
-               switch *prog {
-               default:
-                       throw("unrollgcprog: unknown instruction")
-
-               case insData:
-                       prog = add1(prog)
-                       siz := int(*prog)
-                       prog = add1(prog)
-                       p := (*[1 << 30]byte)(unsafe.Pointer(prog))
-                       for i := 0; i < siz; i++ {
-                               v := p[i/8] >> (uint(i) % 8) & 1
-                               if inplace {
-                                       throw("gc inplace")
-                                       const typeShift = 2
-                                       // Store directly into GC bitmap.
-                                       h := heapBitsForAddr(uintptr(unsafe.Pointer(&mask[pos])))
-                                       if h.shift == 0 {
-                                               *h.bitp = v << typeShift
-                                       } else {
-                                               *h.bitp |= v << (4 + typeShift)
-                                       }
-                                       pos += ptrSize
+               // Flush accumulated full bytes.
+               // The rest of the loop assumes that nbits <= 7.
+               for ; nbits >= 8; nbits -= 8 {
+                       if size == 1 {
+                               *dst = uint8(bits)
+                               dst = add1(dst)
+                               bits >>= 8
+                       } else {
+                               v := bits&bitPointerAll | bitMarkedAll
+                               *dst = uint8(v)
+                               dst = subtract1(dst)
+                               bits >>= 4
+                               v = bits&bitPointerAll | bitMarkedAll
+                               *dst = uint8(v)
+                               dst = subtract1(dst)
+                               bits >>= 4
+                       }
+               }
+
+               // Process one instruction.
+               inst := uintptr(*p)
+               p = add1(p)
+               n := inst & 0x7F
+               if inst&0x80 == 0 {
+                       // Literal bits; n == 0 means end of program.
+                       if n == 0 {
+                               // Program is over; continue in trailer if present.
+                               if trailer != nil {
+                                       //println("trailer")
+                                       p = trailer
+                                       trailer = nil
+                                       continue
+                               }
+                               //println("done")
+                               break Run
+                       }
+                       //println("lit", n, dst)
+                       nbyte := n / 8
+                       for i := uintptr(0); i < nbyte; i++ {
+                               bits |= uintptr(*p) << nbits
+                               p = add1(p)
+                               if size == 1 {
+                                       *dst = uint8(bits)
+                                       dst = add1(dst)
+                                       bits >>= 8
                                 } else {
-                                       // 1 bit per word, for data/bss bitmap
-                                       mask[pos/8] |= v << (pos % 8)
-                                       pos++
+                                       v := bits&0xf | bitMarkedAll
+                                       *dst = uint8(v)
+                                       dst = subtract1(dst)
+                                       bits >>= 4
+                                       v = bits&0xf | bitMarkedAll
+                                       *dst = uint8(v)
+                                       dst = subtract1(dst)
+                                       bits >>= 4
+                               }
+                       }
+                       if n %= 8; n > 0 {
+                               bits |= uintptr(*p) << nbits
+                               p = add1(p)
+                               nbits += n
+                       }
+                       continue Run
+               }
+
+               // Repeat. If n == 0, it is encoded in a varint in the next bytes.
+               if n == 0 {
+                       for off := uint(0); ; off += 7 {
+                               x := uintptr(*p)
+                               p = add1(p)
+                               n |= (x & 0x7F) << off
+                               if x&0x80 == 0 {
+                                       break
+                               }
+                       }
+               }
+
+               // Count is encoded in a varint in the next bytes.
+               c := uintptr(0)
+               for off := uint(0); ; off += 7 {
+                       x := uintptr(*p)
+                       p = add1(p)
+                       c |= (x & 0x7F) << off
+                       if x&0x80 == 0 {
+                               break
+                       }
+               }
+               c *= n // now total number of bits to copy
+
+               // If the number of bits being repeated is small, load them
+               // into a register and use that register for the entire loop
+               // instead of repeatedly reading from memory.
+               // Handling fewer than 8 bits here makes the general loop simpler.
+               // The cutoff is ptrSize*8 - 7 to guarantee that when we add
+               // the pattern to a bit buffer holding at most 7 bits (a partial byte)
+               // it will not overflow.
+               src := dst
+               const maxBits = ptrSize*8 - 7
+               if n <= maxBits {
+                       // Start with bits in output buffer.
+                       pattern := bits
+                       npattern := nbits
+
+                       // If we need more bits, fetch them from memory.
+                       if size == 1 {
+                               src = subtract1(src)
+                               for npattern < n {
+                                       pattern <<= 8
+                                       pattern |= uintptr(*src)
+                                       src = subtract1(src)
+                                       npattern += 8
+                               }
+                       } else {
+                               src = add1(src)
+                               for npattern < n {
+                                       pattern <<= 4
+                                       pattern |= uintptr(*src) & 0xf
+                                       src = add1(src)
+                                       npattern += 4
                                 }
                         }
-                       prog = addb(prog, (uintptr(siz)+7)/8)
  
-               case insArray:
-                       prog = (*byte)(add(unsafe.Pointer(prog), 1))
-                       siz := uintptr(0)
-                       for i := uintptr(0); i < ptrSize; i++ {
-                               siz = (siz << 8) + uintptr(*(*byte)(add(unsafe.Pointer(prog), ptrSize-i-1)))
+                       // We started with the whole bit output buffer,
+                       // and then we loaded bits from whole bytes.
+                       // Either way, we might now have too many instead of too few.
+                       // Discard the extra.
+                       if npattern > n {
+                               pattern >>= npattern - n
+                               npattern = n
                         }
-                       prog = (*byte)(add(unsafe.Pointer(prog), ptrSize))
-                       var prog1 *byte
-                       for i := uintptr(0); i < siz; i++ {
-                               prog1 = unrollgcprog1(&mask[0], prog, &pos, inplace)
+
+                       // Replicate pattern to at most maxBits.
+                       if npattern == 1 {
+                               // One bit being repeated.
+                               // If the bit is 1, make the pattern all 1s.
+                               // If the bit is 0, the pattern is already all 0s,
+                               // but we can claim that the number of bits
+                               // in the word is equal to the number we need (c),
+                               // because right shift of bits will zero fill.
+                               if pattern == 1 {
+                                       pattern = 1<<maxBits - 1
+                                       npattern = maxBits
+                               } else {
+                                       npattern = c
+                               }
+                       } else {
+                               b := pattern
+                               nb := npattern
+                               if nb+nb <= maxBits {
+                                       // Double pattern until the whole uintptr is filled.
+                                       for nb <= ptrSize*8 {
+                                               b |= b << nb
+                                               nb += nb
+                                       }
+                                       // Trim away incomplete copy of original pattern in high bits.
+                                       // TODO(rsc): Replace with table lookup or loop on systems without divide?
+                                       nb = maxBits / npattern * npattern
+                                       b &= 1<<nb - 1
+                                       pattern = b
+                                       npattern = nb
+                               }
                         }
-                       if *prog1 != insArrayEnd {
-                               throw("unrollgcprog: array does not end with insArrayEnd")
+
+                       // Add pattern to bit buffer and flush bit buffer, c/npattern times.
+                       // Since pattern contains >8 bits, there will be full bytes to flush
+                       // on each iteration.
+                       for ; c >= npattern; c -= npattern {
+                               bits |= pattern << nbits
+                               nbits += npattern
+                               if size == 1 {
+                                       for nbits >= 8 {
+                                               *dst = uint8(bits)
+                                               dst = add1(dst)
+                                               bits >>= 8
+                                               nbits -= 8
+                                       }
+                               } else {
+                                       for nbits >= 4 {
+                                               *dst = uint8(bits&0xf | bitMarkedAll)
+                                               dst = subtract1(dst)
+                                               bits >>= 4
+                                               nbits -= 4
+                                       }
+                               }
                         }
-                       prog = (*byte)(add(unsafe.Pointer(prog1), 1))
  
-               case insArrayEnd, insEnd:
-                       *ppos = pos
-                       return prog
+                       // Add final fragment to bit buffer.
+                       if c > 0 {
+                               pattern &= 1<<c - 1
+                               bits |= pattern << nbits
+                               nbits += c
+                       }
+                       continue Run
                 }
-       }
-}
-
-// Unrolls GC program prog for data/bss, returns 1-bit GC mask.
-func unrollglobgcprog(prog *byte, size uintptr) bitvector {
-       masksize := round(round(size, ptrSize)/ptrSize, 8) / 8
-       mask := (*[1 << 30]byte)(persistentalloc(masksize+1, 0, &memstats.gc_sys))
-       mask[masksize] = 0xa1
-       pos := uintptr(0)
-       prog = unrollgcprog1(&mask[0], prog, &pos, false)
-       if pos != size/ptrSize {
-               print("unrollglobgcprog: bad program size, got ", pos, ", expect ", size/ptrSize, "\n")
-               throw("unrollglobgcprog: bad program size")
-       }
-       if *prog != insEnd {
-               throw("unrollglobgcprog: program does not end with insEnd")
-       }
-       if mask[masksize] != 0xa1 {
-               throw("unrollglobgcprog: overflow")
-       }
-       return bitvector{int32(masksize * 8), &mask[0]}
-}
  
-func unrollgcproginplace_m(v unsafe.Pointer, typ *_type, size, size0 uintptr) {
-       throw("unrollinplace")
-       // TODO(rsc): Update for 1-bit bitmaps.
-       // TODO(rsc): Explain why these non-atomic updates are okay.
-       pos := uintptr(0)
-       prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1])))
-       for pos != size0 {
-               unrollgcprog1((*byte)(v), prog, &pos, true)
+               // Repeat; n too large to fit in a register.
+               // Since nbits <= 7, we know the first few bytes of repeated data
+               // are already written to memory.
+               off := n - nbits // n > nbits because n > maxBits and nbits <= 7
+               if size == 1 {
+                       // Leading src fragment.
+                       src = subtractb(src, (off+7)/8)
+                       if frag := off & 7; frag != 0 {
+                               bits |= uintptr(*src) >> (8 - frag) << nbits
+                               src = add1(src)
+                               nbits += frag
+                               c -= frag
+                       }
+                       // Main loop: load one byte, write another.
+                       // The bits are rotating through the bit buffer.
+                       for i := c / 8; i > 0; i-- {
+                               bits |= uintptr(*src) << nbits
+                               src = add1(src)
+                               *dst = uint8(bits)
+                               dst = add1(dst)
+                               bits >>= 8
+                       }
+                       // Final src fragment.
+                       if c %= 8; c > 0 {
+                               bits |= (uintptr(*src) & (1<<c - 1)) << nbits
+                               nbits += c
+                       }
+               } else {
+                       // Leading src fragment.
+                       src = addb(src, (off+3)/4)
+                       if frag := off & 3; frag != 0 {
+                               bits |= (uintptr(*src) & 0xf) >> (4 - frag) << nbits
+                               src = subtract1(src)
+                               nbits += frag
+                               c -= frag
+                       }
+                       // Main loop: load one byte, write another.
+                       // The bits are rotating through the bit buffer.
+                       for i := c / 4; i > 0; i-- {
+                               bits |= (uintptr(*src) & 0xf) << nbits
+                               src = subtract1(src)
+                               *dst = uint8(bits&0xf | bitMarkedAll)
+                               dst = subtract1(dst)
+                               bits >>= 4
+                       }
+                       // Final src fragment.
+                       if c %= 4; c > 0 {
+                               bits |= (uintptr(*src) & (1<<c - 1)) << nbits
+                               nbits += c
+                       }
+               }
         }
  
-       // Mark first word as bitAllocated.
-       // Mark word after last as typeDead.
-       if size0 < size {
-               h := heapBitsForAddr(uintptr(v) + size0)
-               const typeMask = 0
-               const typeShift = 0
-               *h.bitp &^= typeMask << typeShift
+       // Write any final bits out, using full-byte writes, even for the final byte.
+       var totalBits uintptr
+       if size == 1 {
+               totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits
+               nbits += -nbits & 7
+               for ; nbits > 0; nbits -= 8 {
+                       *dst = uint8(bits)
+                       dst = add1(dst)
+                       bits >>= 8
+               }
+       } else {
+               totalBits = (uintptr(unsafe.Pointer(dstStart))-uintptr(unsafe.Pointer(dst)))*4 + nbits
+               nbits += -nbits & 3
+               for ; nbits > 0; nbits -= 4 {
+                       v := bits&0xf | bitMarkedAll
+                       *dst = uint8(v)
+                       dst = subtract1(dst)
+                       bits >>= 4
+               }
+               // Clear the mark bits in the first two entries.
+               // They are the actual mark and checkmark bits,
+               // not non-dead markers. It simplified the code
+               // above to set the marker in every bit written and
+               // then clear these two as a special case at the end.
+               *dstStart &^= bitMarked | bitMarked<<heapBitsShift
         }
+       return totalBits
  }
  
-var unroll mutex
-
-// Unrolls GC program in typ.gc[1] into typ.gc[0]
-//go:nowritebarrier
-func unrollgcprog_m(typ *_type) {
-       lock(&unroll)
-       mask := (*byte)(unsafe.Pointer(uintptr(typ.gc[0])))
-       if *mask == 0 {
-               pos := uintptr(8) // skip the unroll flag
-               prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1])))
-               prog = unrollgcprog1(mask, prog, &pos, false)
-               if *prog != insEnd {
-                       throw("unrollgcprog: program does not end with insEnd")
+func dumpGCProg(p *byte) {
+       nptr := 0
+       for {
+               x := *p
+               p = add1(p)
+               if x == 0 {
+                       print("\t", nptr, " end\n")
+                       break
+               }
+               if x&0x80 == 0 {
+                       print("\t", nptr, " lit ", x, ":")
+                       n := int(x+7) / 8
+                       for i := 0; i < n; i++ {
+                               print(" ", hex(*p))
+                               p = add1(p)
+                       }
+                       print("\n")
+                       nptr += int(x)
+               } else {
+                       nbit := int(x &^ 0x80)
+                       if nbit == 0 {
+                               for nb := uint(0); ; nb += 7 {
+                                       x := *p
+                                       p = add1(p)
+                                       nbit |= int(x&0x7f) << nb
+                                       if x&0x80 == 0 {
+                                               break
+                                       }
+                               }
+                       }
+                       count := 0
+                       for nb := uint(0); ; nb += 7 {
+                               x := *p
+                               p = add1(p)
+                               count |= int(x&0x7f) << nb
+                               if x&0x80 == 0 {
+                                       break
+                               }
+                       }
+                       print("\t", nptr, " repeat ", nbit, " × ", count, "\n")
+                       nptr += nbit * count
                 }
-               // atomic way to say mask[0] = 1
-               atomicor8(mask, 1)
         }
-       unlock(&unroll)
  }
  
  // Testing.
@@ -1192,6 +1504,19 @@ func getgcmaskcb(frame *stkframe, ctxt unsafe.Pointer) bool {
         return true
  }
  
+// gcbits returns the GC type info for x, for testing.
+// The result is the bitmap entries (0 or 1), one entry per byte.
+//go:linkname reflect_gcbits reflect.gcbits
+func reflect_gcbits(x interface{}) []byte {
+       ret := getgcmask(x)
+       typ := (*ptrtype)(unsafe.Pointer((*eface)(unsafe.Pointer(&x))._type)).elem
+       nptr := typ.ptrdata / ptrSize
+       for uintptr(len(ret)) > nptr && ret[len(ret)-1] == 0 {
+               ret = ret[:len(ret)-1]
+       }
+       return ret
+}
+
  // Returns GC type info for object p for testing.
  func getgcmask(ep interface{}) (mask []byte) {
         e := *(*eface)(unsafe.Pointer(&ep))
@@ -1243,36 +1568,43 @@ func getgcmask(ep interface{}) (mask []byte) {
         }
  
         // stack
-       var frame stkframe
-       frame.sp = uintptr(p)
-       _g_ := getg()
-       gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0)
-       if frame.fn != nil {
-               f := frame.fn
-               targetpc := frame.continpc
-               if targetpc == 0 {
-                       return
-               }
-               if targetpc != f.entry {
-                       targetpc--
-               }
-               pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc)
-               if pcdata == -1 {
-                       return
-               }
-               stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
-               if stkmap == nil || stkmap.n <= 0 {
-                       return
-               }
-               bv := stackmapdata(stkmap, pcdata)
-               size := uintptr(bv.n) * ptrSize
-               n := (*ptrtype)(unsafe.Pointer(t)).elem.size
-               mask = make([]byte, n/ptrSize)
-               for i := uintptr(0); i < n; i += ptrSize {
-                       bitmap := bv.bytedata
-                       off := (uintptr(p) + i - frame.varp + size) / ptrSize
-                       mask[i/ptrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
+       if _g_ := getg(); _g_.m.curg.stack.lo <= uintptr(p) && uintptr(p) < _g_.m.curg.stack.hi {
+               var frame stkframe
+               frame.sp = uintptr(p)
+               _g_ := getg()
+               gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0)
+               if frame.fn != nil {
+                       f := frame.fn
+                       targetpc := frame.continpc
+                       if targetpc == 0 {
+                               return
+                       }
+                       if targetpc != f.entry {
+                               targetpc--
+                       }
+                       pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc)
+                       if pcdata == -1 {
+                               return
+                       }
+                       stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
+                       if stkmap == nil || stkmap.n <= 0 {
+                               return
+                       }
+                       bv := stackmapdata(stkmap, pcdata)
+                       size := uintptr(bv.n) * ptrSize
+                       n := (*ptrtype)(unsafe.Pointer(t)).elem.size
+                       mask = make([]byte, n/ptrSize)
+                       for i := uintptr(0); i < n; i += ptrSize {
+                               bitmap := bv.bytedata
+                               off := (uintptr(p) + i - frame.varp + size) / ptrSize
+                               mask[i/ptrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
+                       }
                 }
+               return
         }
+
+       // otherwise, not something the GC knows about.
+       // possibly read-only data, like malloc(0).
+       // must not have pointers
         return
  }
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go

index 11e885f92886d0c780e2f16706e524616e9087da..d33fbf11dea7d11f0996e0016e886ec56586d41b 100644 (file)
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -146,8 +146,8 @@ func gcinit() {
         work.markfor = parforalloc(_MaxGcproc)
         _ = setGCPercent(readgogc())
         for datap := &firstmoduledata; datap != nil; datap = datap.next {
-               datap.gcdatamask = unrollglobgcprog((*byte)(unsafe.Pointer(datap.gcdata)), datap.edata-datap.data)
-               datap.gcbssmask = unrollglobgcprog((*byte)(unsafe.Pointer(datap.gcbss)), datap.ebss-datap.bss)
+               datap.gcdatamask = progToPointerMask((*byte)(unsafe.Pointer(datap.gcdata)), datap.edata-datap.data)
+               datap.gcbssmask = progToPointerMask((*byte)(unsafe.Pointer(datap.gcbss)), datap.ebss-datap.bss)
         }
         memstats.next_gc = heapminimum
  }
diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go

index 9afa9542591ed7b9b43b6367c6786bd3dad2a357..687f067cb920945b515ecfe724706d487492c276 100644 (file)
--- a/src/runtime/symtab.go
+++ b/src/runtime/symtab.go
@@ -32,6 +32,8 @@ const (
  // moduledata records information about the layout of the executable
  // image. It is written by the linker. Any changes here must be
  // matched changes to the code in cmd/internal/ld/symtab.go:symtab.
+// moduledata is stored in read-only memory; none of the pointers here
+// are visible to the garbage collector.
  type moduledata struct {
         pclntable    []byte
         ftab         []functab
diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go

index 0f29608aae5a5e092287a6ccec07692072c67815..12b2a536036805f224463bd5320bc65b7873f458 100644 (file)
--- a/src/runtime/traceback.go
+++ b/src/runtime/traceback.go
@@ -469,7 +469,7 @@ func setArgInfo(frame *stkframe, f *_func, needArgMap bool) {
                                 throw("reflect mismatch")
                         }
                         bv := (*bitvector)(unsafe.Pointer(fn[1]))
-                       frame.arglen = uintptr(bv.n / 2 * ptrSize)
+                       frame.arglen = uintptr(bv.n * ptrSize)
                         frame.argmap = bv
                 }
         }
diff --git a/src/runtime/type.go b/src/runtime/type.go

index 48df2a438243021d5323d25eff12290decc2ff75..45bdac8b91e4566ad399ea30b43d97df388f1df1 100644 (file)
--- a/src/runtime/type.go
+++ b/src/runtime/type.go
@@ -20,17 +20,10 @@ type _type struct {
         fieldalign uint8
         kind       uint8
         alg        *typeAlg
-       // gc stores type info required for garbage collector.
-       // If (kind&KindGCProg)==0, then gc[0] points at sparse GC bitmap
-       // (no indirection), 4 bits per word.
-       // If (kind&KindGCProg)!=0, then gc[1] points to a compiler-generated
-       // read-only GC program; and gc[0] points to BSS space for sparse GC bitmap.
-       // For huge types (>maxGCMask), runtime unrolls the program directly into
-       // GC bitmap and gc[0] is not used. For moderately-sized types, runtime
-       // unrolls the program into gc[0] space on first use. The first byte of gc[0]
-       // (gc[0][0]) contains 'unroll' flag saying whether the program is already
-       // unrolled into gc[0] or not.
-       gc      [2]uintptr
+       // gcdata stores the GC type data for the garbage collector.
+       // If the KindGCProg bit is set in kind, gcdata is a GC program.
+       // Otherwise it is a ptrmask bitmap. See mbitmap.go for details.
+       gcdata  *byte
         _string *string
         x       *uncommontype
         ptrto   *_type
author	Russ Cox <rsc@golang.org>
	Fri, 8 May 2015 05:43:18 +0000 (01:43 -0400)
committer	Russ Cox <rsc@golang.org>
	Sat, 16 May 2015 00:38:17 +0000 (00:38 +0000)
src/cmd/dist/buildtool.go		patch \| blob \| history
src/cmd/internal/gc/lex.go		patch \| blob \| history
src/cmd/internal/gc/plive.go		patch \| blob \| history
src/cmd/internal/gc/reflect.go		patch \| blob \| history
src/cmd/internal/gcprog/gcprog.go	[new file with mode: 0644]	patch \| blob
src/cmd/internal/ld/data.go		patch \| blob \| history
src/cmd/internal/ld/decodesym.go		patch \| blob \| history
src/cmd/internal/ld/lib.go		patch \| blob \| history
src/cmd/internal/ld/link.go		patch \| blob \| history
src/cmd/internal/ld/objfile.go		patch \| blob \| history
src/reflect/all_test.go		patch \| blob \| history
src/reflect/export_test.go		patch \| blob \| history
src/reflect/type.go		patch \| blob \| history
src/reflect/value.go		patch \| blob \| history
src/runtime/mbitmap.go		patch \| blob \| history
src/runtime/mgc.go		patch \| blob \| history
src/runtime/symtab.go		patch \| blob \| history
src/runtime/traceback.go		patch \| blob \| history
src/runtime/type.go		patch \| blob \| history