extern volatile intgo runtime·MemProfileRate;
+static void* largealloc(uint32, uintptr*);
+
// Allocate an object of at least size bytes.
// Small objects are allocated from the per-thread cache's free lists.
// Large objects (> 32 kB) are allocated straight from the heap.
runtime·mallocgc(uintptr size, uintptr typ, uint32 flag)
{
int32 sizeclass;
+ uintptr tinysize, size1;
intgo rate;
MCache *c;
MCacheList *l;
- uintptr npages;
- MSpan *s;
MLink *v;
+ byte *tiny;
+ P *p;
if(size == 0) {
// All 0-length allocations use this pointer.
c = m->mcache;
if(!runtime·debug.efence && size <= MaxSmallSize) {
+ if((flag&(FlagNoScan|FlagNoGC)) == FlagNoScan && size < TinySize) {
+ // Tiny allocator.
+ //
+ // Tiny allocator combines several tiny allocation requests
+ // into a single memory block. The resulting memory block
+ // is freed when all subobjects are unreachable. The subobjects
+ // must be FlagNoScan (don't have pointers), this ensures that
+ // the amount of potentially wasted memory is bounded.
+ //
+ // Size of the memory block used for combining (TinySize) is tunable.
+ // Current setting is 16 bytes, which relates to 2x worst case memory
+ // wastage (when all but one subobjects are unreachable).
+ // 8 bytes would result in no wastage at all, but provides less
+ // opportunities for combining.
+ // 32 bytes provides more opportunities for combining,
+ // but can lead to 4x worst case wastage.
+ // The best case winning is 8x regardless of block size.
+ //
+ // Objects obtained from tiny allocator must not be freed explicitly.
+ // So when an object will be freed explicitly, we ensure that
+ // its size >= TinySize.
+ //
+ // SetFinalizer has a special case for objects potentially coming
+ // from tiny allocator, it such case it allows to set finalizers
+ // for an inner byte of a memory block.
+ //
+ // The main targets of tiny allocator are small strings and
+ // standalone escaping variables. On a json benchmark
+ // the allocator reduces number of allocations by ~12% and
+ // reduces heap size by ~20%.
+
+ p = m->p;
+ tinysize = p->tinysize;
+ if(size <= tinysize) {
+ tiny = p->tiny;
+ // Align tiny pointer for required (conservative) alignment.
+ if((size&7) == 0)
+ tiny = (byte*)ROUND((uintptr)tiny, 8);
+ else if((size&3) == 0)
+ tiny = (byte*)ROUND((uintptr)tiny, 4);
+ else if((size&1) == 0)
+ tiny = (byte*)ROUND((uintptr)tiny, 2);
+ size1 = size + (tiny - p->tiny);
+ if(size1 <= tinysize) {
+ // The object fits into existing tiny block.
+ v = (MLink*)tiny;
+ p->tiny += size1;
+ p->tinysize -= size1;
+ m->mallocing = 0;
+ m->locks--;
+ if(m->locks == 0 && g->preempt) // restore the preemption request in case we've cleared it in newstack
+ g->stackguard0 = StackPreempt;
+ return v;
+ }
+ }
+ // Allocate a new TinySize block.
+ l = &c->list[TinySizeClass];
+ if(l->list == nil)
+ runtime·MCache_Refill(c, TinySizeClass);
+ v = l->list;
+ l->list = v->next;
+ l->nlist--;
+ ((uint64*)v)[0] = 0;
+ ((uint64*)v)[1] = 0;
+ // See if we need to replace the existing tiny block with the new one
+ // based on amount of remaining free space.
+ if(TinySize-size > tinysize) {
+ p->tiny = (byte*)v + size;
+ p->tinysize = TinySize - size;
+ }
+ size = TinySize;
+ goto done;
+ }
// Allocate from mcache free lists.
// Inlined version of SizeToClass().
if(size <= 1024-8)
if(size > 2*sizeof(uintptr) && ((uintptr*)v)[1] != 0)
runtime·memclr((byte*)v, size);
}
+ done:
c->local_cachealloc += size;
} else {
- // TODO(rsc): Report tracebacks for very large allocations.
-
// Allocate directly from heap.
- npages = size >> PageShift;
- if((size & PageMask) != 0)
- npages++;
- s = runtime·MHeap_Alloc(&runtime·mheap, npages, 0, 1, !(flag & FlagNoZero));
- if(s == nil)
- runtime·throw("out of memory");
- s->limit = (byte*)(s->start<<PageShift) + size;
- size = npages<<PageShift;
- v = (void*)(s->start << PageShift);
-
- // setup for mark sweep
- runtime·markspan(v, 0, 0, true);
+ v = largealloc(flag, &size);
}
if(flag & FlagNoGC)
return v;
}
+static void*
+largealloc(uint32 flag, uintptr *sizep)
+{
+ uintptr npages, size;
+ MSpan *s;
+ void *v;
+
+ // Allocate directly from heap.
+ size = *sizep;
+ npages = size >> PageShift;
+ if((size & PageMask) != 0)
+ npages++;
+ s = runtime·MHeap_Alloc(&runtime·mheap, npages, 0, 1, !(flag & FlagNoZero));
+ if(s == nil)
+ runtime·throw("out of memory");
+ s->limit = (byte*)(s->start<<PageShift) + size;
+ *sizep = npages<<PageShift;
+ v = (void*)(s->start << PageShift);
+ // setup for mark sweep
+ runtime·markspan(v, 0, 0, true);
+ return v;
+}
+
void*
runtime·malloc(uintptr size)
{
}
size = s->elemsize;
sizeclass = s->sizeclass;
+ // Objects that are smaller than TinySize can be allocated using tiny alloc,
+ // if then such object is combined with an object with finalizer, we will crash.
+ if(size < TinySize)
+ runtime·throw("freeing too small block");
if(raceenabled)
runtime·racefree(v);
runtime·InitSizes();
+ if(runtime·class_to_size[TinySizeClass] != TinySize)
+ runtime·throw("bad TinySizeClass");
+
// limit = runtime·memlimit();
// See https://code.google.com/p/go/issues/detail?id=5049
// TODO(rsc): Fix after 1.1.
m->mcache = runtime·allocmcache();
// See if it works.
- runtime·free(runtime·malloc(1));
+ runtime·free(runtime·malloc(TinySize));
}
void*
goto throw;
}
ot = (PtrType*)obj.type;
- if(ot->elem != nil && ot->elem->size == 0) {
+ // As an implementation detail we do not run finalizers for zero-sized objects,
+ // because we use &runtime·zerobase for all such allocations.
+ if(ot->elem != nil && ot->elem->size == 0)
return;
- }
if(!runtime·mlookup(obj.data, &base, &size, nil) || obj.data != base) {
- runtime·printf("runtime.SetFinalizer: pointer not at beginning of allocated block\n");
- goto throw;
+ // As an implementation detail we allow to set finalizers for an inner byte
+ // of an object if it could come from tiny alloc (see mallocgc for details).
+ if(ot->elem == nil || (ot->elem->kind&KindNoPointers) == 0 || ot->elem->size >= TinySize) {
+ runtime·printf("runtime.SetFinalizer: pointer not at beginning of allocated block\n");
+ goto throw;
+ }
}
if(finalizer.type != nil) {
if(finalizer.type->kind != KindFunc)
}
pools.head = nil;
- // clear defer pools
for(pp=runtime·allp; p=*pp; pp++) {
+ // clear tinyalloc pool
+ p->tiny = nil;
+ p->tinysize = 0;
+ // clear defer pools
for(i=0; i<nelem(p->deferpool); i++)
p->deferpool[i] = nil;
}
MSpan **allspans, *s;
uint32 spanidx;
G *gp;
+ void *p;
USED(&desc);
wbuf = getempty(nil);
// don't mark finalized object, but scan it so we
// retain everything it points to.
spf = (SpecialFinalizer*)sp;
- enqueue1(&wbuf, (Obj){(void*)((s->start << PageShift) + spf->offset), s->elemsize, 0});
+ // A finalizer can be set for an inner byte of an object, find object beginning.
+ p = (void*)((s->start << PageShift) + spf->offset/s->elemsize*s->elemsize);
+ enqueue1(&wbuf, (Obj){p, s->elemsize, 0});
enqueue1(&wbuf, (Obj){(void*)&spf->fn, PtrSize, 0});
enqueue1(&wbuf, (Obj){(void*)&spf->fint, PtrSize, 0});
enqueue1(&wbuf, (Obj){(void*)&spf->ot, PtrSize, 0});
specialp = &s->specials;
special = *specialp;
while(special != nil) {
- p = (byte*)(s->start << PageShift) + special->offset;
+ // A finalizer can be set for an inner byte of an object, find object beginning.
+ p = (byte*)(s->start << PageShift) + special->offset/size*size;
off = (uintptr*)p - (uintptr*)arena_start;
bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
bits = *bitp>>shift;
if((bits & (bitAllocated|bitMarked)) == bitAllocated) {
+ // Find the exact byte for which the special was setup
+ // (as opposed to object beginning).
+ p = (byte*)(s->start << PageShift) + special->offset;
// about to free object: splice out special record
y = special;
special = special->next;
--- /dev/null
+// run
+
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test finalizers work for tiny (combined) allocations.
+
+package main
+
+import (
+ "runtime"
+ "sync/atomic"
+ "time"
+)
+
+func main() {
+ // Does not work on 32-bits due to partially conservative GC.
+ // Try to enable when we have fully precise GC.
+ if runtime.GOARCH != "amd64" {
+ return
+ }
+ // Likewise for gccgo.
+ if runtime.Compiler == "gccgo" {
+ return
+ }
+ N := int32(100)
+ count := N
+ done := make([]bool, N)
+ for i := int32(0); i < N; i++ {
+ x := i // subject to tiny alloc
+ // the closure must be big enough to be combined
+ runtime.SetFinalizer(&x, func(p *int32) {
+ // Check that p points to the correct subobject of the tiny allocation.
+ // It's a bit tricky, because we can't capture another variable
+ // with the expected value (it would be combined as well).
+ if *p < 0 || *p >= N {
+ println("got", *p)
+ panic("corrupted")
+ }
+ if done[*p] {
+ println("got", *p)
+ panic("already finalized")
+ }
+ done[*p] = true
+ atomic.AddInt32(&count, -1)
+ })
+ }
+ for i := 0; i < 4; i++ {
+ runtime.GC()
+ time.Sleep(10 * time.Millisecond)
+ }
+ // Some of the finalizers may not be executed,
+ // if the outermost allocations are combined with something persistent.
+ // Currently 4 int32's are combined into a 16-byte block,
+ // ensure that most of them are finalized.
+ if count >= N/4 {
+ println(count, "out of", N, "finalizer are not called")
+ panic("not all finalizers are called")
+ }
+}
+