no way to get the data out yet.
add prototype for runtime.Callers,
missing from last CL.
R=r
CC=golang-dev
https://golang.org/cl/713041
mgc0.$O\
mheap.$O\
mheapmap$(SIZE).$O\
+ mprof.$O\
msize.$O\
print.$O\
proc.$O\
func Breakpoint()
// Caller reports file and line number information about function invocations on
-// the calling goroutine's stack. The argument is the number of stack frames to
+// the calling goroutine's stack. The argument skip is the number of stack frames to
// ascend, with 0 identifying the the caller of Caller. The return values report the
// program counter, file name, and line number within the file of the corresponding
// call. The boolean ok is false if it was not possible to recover the information.
-func Caller(n int) (pc uintptr, file string, line int, ok bool)
+func Caller(skip int) (pc uintptr, file string, line int, ok bool)
+
+// Callers fills the slice pc with the program counters of function invocations
+// on the calling goroutine's stack. The argument skip is the number of stack frames
+// to skip before recording in pc, with 0 starting at the caller of Caller.
+// It returns the number of entries written to pc.
+func Callers(skip int, pc []int) int
// mid returns the current os thread (m) id.
func mid() uint32
// A trailing + indicates that the tree had local modifications
// at the time of the build.
func Version() string { return defaultVersion }
+
+// MemProfileKind specifies how frequently to record
+// memory allocations in the memory profiler.
+type MemProfileKind int
+
+const (
+ MemProfileNone MemProfileKind = iota // no profiling
+ MemProfileSample // profile random sample
+ MemProfileAll // profile every allocation
+)
+
+// SetMemProfileKind sets the fraction of memory allocations
+// that are recorded and reported in the memory profile.
+// Profiling an allocation has a small overhead, so the default
+// is to profile only a random sample, weighted by block size.
+func SetMemProfileKind(kind MemProfileKind)
if(wid <= sizeof(*dst))
algarray[alg].copy(wid, dst, src);
else {
- p = mal(wid);
+ p = malx(wid, 1);
algarray[alg].copy(wid, p, src);
*dst = p;
}
t = (Type*)((Eface*)typ.data-1);
if(t->kind&KindNoPointers)
- ret = mallocgc(t->size, RefNoPointers, 1, 1);
+ ret = mallocgc(t->size, RefNoPointers, 1, 1, 1);
else
ret = mal(t->size);
FLUSH(&ret);
size = n*t->size;
if(t->kind&KindNoPointers)
- ret = mallocgc(size, RefNoPointers, 1, 1);
+ ret = mallocgc(size, RefNoPointers, 1, 1, 1);
else
ret = mal(size);
FLUSH(&ret);
MHeap mheap;
MStats mstats;
+// Same algorithm from chan.c, but a different
+// instance of the static uint32 x.
+// Not protected by a lock - let the threads use
+// the same random number if they like.
+static uint32
+fastrand1(void)
+{
+ static uint32 x = 0x49f6428aUL;
+
+ x += x;
+ if(x & 0x80000000L)
+ x ^= 0x88888eefUL;
+ return x;
+}
+
// Allocate an object of at least size bytes.
// Small objects are allocated from the per-thread cache's free lists.
// Large objects (> 32 kB) are allocated straight from the heap.
void*
-mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed)
+mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed, int32 skip_depth)
{
int32 sizeclass;
MCache *c;
s = MHeap_Alloc(&mheap, npages, 0, 1);
if(s == nil)
throw("out of memory");
- mstats.alloc += npages<<PageShift;
- mstats.total_alloc += npages<<PageShift;
+ size = npages<<PageShift;
+ mstats.alloc += size;
+ mstats.total_alloc += size;
v = (void*)(s->start << PageShift);
// setup for mark sweep
s->gcref0 = RefNone | refflag;
+ ref = &s->gcref0;
}
m->mallocing = 0;
+ if(!(refflag & RefNoProfiling) && malloc_profile != MProf_None) {
+ switch(malloc_profile) {
+ case MProf_Sample:
+ if(m->mcache->next_sample > size) {
+ m->mcache->next_sample -= size;
+ break;
+ }
+ m->mcache->next_sample = fastrand1() & (256*1024 - 1); // sample every 128 kB allocated, on average
+ // fall through
+ case MProf_All:
+ *ref |= RefProfiled;
+ MProf_Malloc(skip_depth+1, v, size);
+ break;
+ }
+ }
+
if(dogc && mstats.heap_alloc >= mstats.next_gc)
gc(0);
return v;
void*
malloc(uintptr size)
{
- return mallocgc(size, 0, 0, 1);
+ return mallocgc(size, 0, 0, 1, 1);
}
// Free the object whose base pointer is v.
int32 sizeclass, size;
MSpan *s;
MCache *c;
- uint32 *ref;
+ uint32 prof, *ref;
if(v == nil)
return;
printf("free %p: not an allocated block\n", v);
throw("free mlookup");
}
+ prof = *ref & RefProfiled;
*ref = RefFree;
// Find size class for v.
sizeclass = s->sizeclass;
if(sizeclass == 0) {
// Large object.
+ if(prof)
+ MProf_Free(v, s->npages<<PageShift);
mstats.alloc -= s->npages<<PageShift;
runtime_memclr(v, s->npages<<PageShift);
MHeap_Free(&mheap, s, 1);
size = class_to_size[sizeclass];
if(size > sizeof(uintptr))
((uintptr*)v)[1] = 1; // mark as "needs to be zeroed"
+ if(prof)
+ MProf_Free(v, size);
mstats.alloc -= size;
mstats.by_size[sizeclass].nfree++;
MCache_Free(c, v, sizeclass, size);
// Runtime stubs.
void*
-mal(uint32 n)
+mal(uintptr n)
+{
+ return mallocgc(n, 0, 1, 1, 2);
+}
+
+void*
+malx(uintptr n, int32 skip_delta)
{
- return mallocgc(n, 0, 1, 1);
+ return mallocgc(n, 0, 1, 1, 2+skip_delta);
}
// Stack allocator uses malloc/free most of the time,
unlock(&stacks);
return v;
}
- v = mallocgc(n, 0, 0, 0);
+ v = mallocgc(n, RefNoProfiling, 0, 0, 0);
if(!mlookup(v, nil, nil, nil, &ref))
throw("stackalloc mlookup");
*ref = RefStack;
MCacheList list[NumSizeClasses];
uint64 size;
int64 local_alloc; // bytes allocated (or freed) since last lock of heap
+ int32 next_sample; // trigger heap sample after allocating this many bytes
};
void* MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed);
MSpan* MHeap_LookupMaybe(MHeap *h, PageID p);
void MGetSizeClassInfo(int32 sizeclass, int32 *size, int32 *npages, int32 *nobj);
-void* mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed);
+void* mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed, int32 skip_depth);
int32 mlookup(void *v, byte **base, uintptr *size, MSpan **s, uint32 **ref);
void gc(int32 force);
RefFinalize, // ready to be finalized
RefNoPointers = 0x80000000U, // flag - no pointers here
RefHasFinalizer = 0x40000000U, // flag - has finalizer
+ RefProfiled = 0x20000000U, // flag - is in profiling table
+ RefNoProfiling = 0x10000000U, // flag - must not profile
+ RefFlags = 0xFFFF0000U,
};
+
+void MProf_Malloc(int32, void*, uintptr);
+void MProf_Free(void*, uintptr);
+
+// Malloc profiling settings.
+// Must match definition in extern.go.
+enum {
+ MProf_None = 0,
+ MProf_Sample = 1,
+ MProf_All = 2,
+};
+extern int32 malloc_profile;
newtab.max *= 3;
}
- newtab.key = mallocgc(newtab.max*sizeof newtab.key[0], RefNoPointers, 0, 1);
- newtab.val = mallocgc(newtab.max*sizeof newtab.val[0], 0, 0, 1);
+ newtab.key = mallocgc(newtab.max*sizeof newtab.key[0], RefNoPointers, 0, 1, 2);
+ newtab.val = mallocgc(newtab.max*sizeof newtab.val[0], 0, 0, 1, 2);
for(i=0; i<fintab.max; i++) {
void *k;
continue;
if(mlookup(obj, &obj, &size, nil, &refp)) {
ref = *refp;
- switch(ref & ~(RefNoPointers|RefHasFinalizer)) {
+ switch(ref & ~RefFlags) {
case RefFinalize:
// If marked for finalization already, some other finalization-ready
// object has a pointer: turn off finalization until that object is gone.
case RefNone:
if(Debug > 1)
printf("%d found at %p: ", depth, &vp[i]);
- *refp = RefSome | (ref & (RefNoPointers|RefHasFinalizer));
+ *refp = RefSome | (ref & RefFlags);
if(!(ref & RefNoPointers))
scanblock(depth+1, obj, size);
break;
if(s->sizeclass == 0) {
// Large block.
ref = s->gcref0;
- if((ref&~RefNoPointers) == (RefNone|RefHasFinalizer)) {
+ if((ref&~(RefFlags^RefHasFinalizer)) == (RefNone|RefHasFinalizer)) {
// Mark as finalizable.
- s->gcref0 = RefFinalize | RefHasFinalizer | (ref&RefNoPointers);
+ s->gcref0 = RefFinalize | RefHasFinalizer | (ref&(RefFlags^RefHasFinalizer));
if(!(ref & RefNoPointers))
scanblock(100, p, s->npages<<PageShift);
}
gcrefep = s->gcref + n;
for(; gcrefp < gcrefep; gcrefp++) {
ref = *gcrefp;
- if((ref&~RefNoPointers) == (RefNone|RefHasFinalizer)) {
+ if((ref&~(RefFlags^RefHasFinalizer)) == (RefNone|RefHasFinalizer)) {
// Mark as finalizable.
- *gcrefp = RefFinalize | RefHasFinalizer | (ref&RefNoPointers);
+ *gcrefp = RefFinalize | RefHasFinalizer | (ref&(RefFlags^RefHasFinalizer));
if(!(ref & RefNoPointers))
scanblock(100, p+(gcrefp-s->gcref)*size, size);
}
if(s->sizeclass == 0) {
// Large block.
ref = s->gcref0;
- switch(ref & ~(RefNoPointers|RefHasFinalizer)) {
+ switch(ref & ~RefFlags) {
case RefNone:
// Free large object.
mstats.alloc -= s->npages<<PageShift;
runtime_memclr(p, s->npages<<PageShift);
+ if(ref & RefProfiled)
+ MProf_Free(p, s->npages<<PageShift);
s->gcref0 = RefFree;
MHeap_Free(&mheap, s, 1);
break;
}
// fall through
case RefSome:
- s->gcref0 = RefNone | (ref&(RefNoPointers|RefHasFinalizer));
+ s->gcref0 = RefNone | (ref&RefFlags);
break;
}
return;
ref = *gcrefp;
if(ref < RefNone) // RefFree or RefStack
continue;
- switch(ref & ~(RefNoPointers|RefHasFinalizer)) {
+ switch(ref & ~RefFlags) {
case RefNone:
// Free small object.
+ if(ref & RefProfiled)
+ MProf_Free(p, size);
*gcrefp = RefFree;
c = m->mcache;
if(size > sizeof(uintptr))
}
// fall through
case RefSome:
- *gcrefp = RefNone | (ref&(RefNoPointers|RefHasFinalizer));
+ *gcrefp = RefNone | (ref&RefFlags);
break;
}
}
--- /dev/null
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Malloc profiling.
+// Patterned after tcmalloc's algorithms; shorter code.
+
+package runtime
+#include "runtime.h"
+#include "malloc.h"
+#include "defs.h"
+#include "type.h"
+
+int32 malloc_profile = MProf_None; // no sampling during bootstrap
+
+// NOTE(rsc): Everything here could use cas if contention became an issue.
+static Lock proflock;
+
+// Per-call-stack allocation information.
+// Lookup by hashing call stack into a linked-list hash table.
+typedef struct Bucket Bucket;
+struct Bucket
+{
+ Bucket *next; // next in hash list
+ Bucket *allnext; // next in list of all buckets
+ uintptr allocs;
+ uintptr frees;
+ uintptr alloc_bytes;
+ uintptr free_bytes;
+ uintptr hash;
+ uintptr nstk;
+ uintptr stk[1];
+};
+enum {
+ BuckHashSize = 179999,
+};
+static Bucket **buckhash;
+static Bucket *buckets;
+static uintptr bucketmem;
+
+// Return the bucket for stk[0:nstk], allocating new bucket if needed.
+static Bucket*
+stkbucket(uintptr *stk, int32 nstk)
+{
+ int32 i;
+ uintptr h;
+ Bucket *b;
+
+ if(buckhash == nil)
+ buckhash = SysAlloc(BuckHashSize*sizeof buckhash[0]);
+
+ // Hash stack.
+ h = 0;
+ for(i=0; i<nstk; i++) {
+ h += stk[i];
+ h += h<<10;
+ h ^= h>>6;
+ }
+ h += h<<3;
+ h ^= h>>11;
+
+ i = h%BuckHashSize;
+ for(b = buckhash[i]; b; b=b->next)
+ if(b->hash == h && b->nstk == nstk &&
+ mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
+ return b;
+
+ b = mallocgc(sizeof *b + nstk*sizeof stk[0], RefNoProfiling, 0, 1, 0);
+ bucketmem += sizeof *b + nstk*sizeof stk[0];
+ memmove(b->stk, stk, nstk*sizeof stk[0]);
+ b->hash = h;
+ b->nstk = nstk;
+ b->next = buckhash[i];
+ buckhash[i] = b;
+ b->allnext = buckets;
+ buckets = b;
+ return b;
+}
+
+// Map from pointer to Bucket* that allocated it.
+// Three levels:
+// Linked-list hash table for top N-20 bits.
+// Array index for next 13 bits.
+// Linked list for next 7 bits.
+// This is more efficient than using a general map,
+// because of the typical clustering of the pointer keys.
+
+typedef struct AddrHash AddrHash;
+typedef struct AddrEntry AddrEntry;
+
+struct AddrHash
+{
+ AddrHash *next; // next in top-level hash table linked list
+ uintptr addr; // addr>>20
+ AddrEntry *dense[1<<13];
+};
+
+struct AddrEntry
+{
+ AddrEntry *next; // next in bottom-level linked list
+ uint32 addr;
+ Bucket *b;
+};
+
+enum {
+ AddrHashBits = 12 // 1MB per entry, so good for 4GB of used address space
+};
+static AddrHash *addrhash[1<<AddrHashBits];
+static AddrEntry *addrfree;
+static uintptr addrmem;
+
+// Multiplicative hash function:
+// hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
+// This is a good multiplier as suggested in CLR, Knuth. The hash
+// value is taken to be the top AddrHashBits bits of the bottom 32 bits
+// of the muliplied value.
+enum {
+ HashMultiplier = 2654435769U
+};
+
+// Set the bucket associated with addr to b.
+static void
+setaddrbucket(uintptr addr, Bucket *b)
+{
+ int32 i;
+ uint32 h;
+ AddrHash *ah;
+ AddrEntry *e;
+
+ h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits);
+ for(ah=addrhash[h]; ah; ah=ah->next)
+ if(ah->addr == (addr>>20))
+ goto found;
+
+ ah = mallocgc(sizeof *ah, RefNoProfiling, 0, 1, 0);
+ addrmem += sizeof *ah;
+ ah->next = addrhash[h];
+ ah->addr = addr>>20;
+ addrhash[h] = ah;
+
+found:
+ if((e = addrfree) == nil) {
+ e = mallocgc(64*sizeof *e, RefNoProfiling, 0, 0, 0);
+ addrmem += 64*sizeof *e;
+ for(i=0; i+1<64; i++)
+ e[i].next = &e[i+1];
+ e[63].next = nil;
+ }
+ addrfree = e->next;
+ e->addr = (uint32)~(addr & ((1<<20)-1));
+ e->b = b;
+ h = (addr>>7)&(nelem(ah->dense)-1); // entry in dense is top 13 bits of low 20.
+ e->next = ah->dense[h];
+ ah->dense[h] = e;
+}
+
+// Get the bucket associated with addr and clear the association.
+static Bucket*
+getaddrbucket(uintptr addr)
+{
+ uint32 h;
+ AddrHash *ah;
+ AddrEntry *e, **l;
+ Bucket *b;
+
+ h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits);
+ for(ah=addrhash[h]; ah; ah=ah->next)
+ if(ah->addr == (addr>>20))
+ goto found;
+ return nil;
+
+found:
+ h = (addr>>7)&(nelem(ah->dense)-1); // entry in dense is top 13 bits of low 20.
+ for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
+ if(e->addr == (uint32)~(addr & ((1<<20)-1))) {
+ *l = e->next;
+ b = e->b;
+ e->next = addrfree;
+ addrfree = e;
+ return b;
+ }
+ }
+ return nil;
+}
+
+// Called by malloc to record a profiled block.
+void
+MProf_Malloc(int32 skip, void *p, uintptr size)
+{
+ int32 nstk;
+ uintptr stk[32];
+ Bucket *b;
+
+ nstk = callers(1+skip, stk, 32);
+ lock(&proflock);
+ b = stkbucket(stk, nstk);
+ b->allocs++;
+ b->alloc_bytes += size;
+ setaddrbucket((uintptr)p, b);
+ unlock(&proflock);
+}
+
+// Called when freeing a profiled block.
+void
+MProf_Free(void *p, uintptr size)
+{
+ Bucket *b;
+
+ lock(&proflock);
+ b = getaddrbucket((uintptr)p);
+ if(b != nil) {
+ b->frees++;
+ b->free_bytes += size;
+ }
+ unlock(&proflock);
+}
+
+
+// Go interface to profile data. (Declared in extern.go)
+// Assumes Go sizeof(int) == sizeof(int32)
+
+func SetMemProfileKind(kind int32) {
+ malloc_profile = kind;
+}
+
void mcpy(byte*, byte*, uint32);
int32 mcmp(byte*, byte*, uint32);
void memmove(void*, void*, uint32);
-void* mal(uint32);
+void* mal(uintptr);
+void* malx(uintptr size, int32 skip_delta);
uint32 cmpstring(String, String);
String gostring(byte*);
String gostringw(uint16*);
ret.cap = cap;
if((t->elem->kind&KindNoPointers))
- ret.array = mallocgc(size, RefNoPointers, 1, 1);
+ ret.array = mallocgc(size, RefNoPointers, 1, 1, 1);
else
ret.array = mal(size);
if(l == 0)
return emptystring;
- s.str = mal(l+1); // leave room for NUL for C runtime (e.g., callers of getenv)
+ s.str = malx(l+1, 1); // leave room for NUL for C runtime (e.g., callers of getenv)
s.len = l;
if(l > maxstring)
maxstring = l;
}
func stringtoslicebyte(s String) (b Slice) {
- b.array = mallocgc(s.len, RefNoPointers, 1, 1);
+ b.array = mallocgc(s.len, RefNoPointers, 1, 1, 1);
b.len = s.len;
b.cap = s.len;
mcpy(b.array, s.str, s.len);
n++;
}
- b.array = mallocgc(n*sizeof(r[0]), RefNoPointers, 1, 1);
+ b.array = mallocgc(n*sizeof(r[0]), RefNoPointers, 1, 1, 1);
b.len = n;
b.cap = n;
p = s.str;