Finer-grained transfers were relevant with per-M caches,
with per-P caches they are not relevant and harmful for performance.
For few small size classes where it makes difference,
it's fine to grab the whole span (4K).
benchmark old ns/op new ns/op delta
BenchmarkMalloc 42 40 -4.45%
R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/
9374043
// class_to_size[i] = largest size in class i
// class_to_allocnpages[i] = number of pages to allocate when
// making new objects in class i
-// class_to_transfercount[i] = number of objects to move when
-// taking a bunch of objects out of the central lists
-// and putting them in the thread free list.
int32 runtime·SizeToClass(int32);
extern int32 runtime·class_to_size[NumSizeClasses];
extern int32 runtime·class_to_allocnpages[NumSizeClasses];
-extern int32 runtime·class_to_transfercount[NumSizeClasses];
extern int8 runtime·size_to_class8[1024/8 + 1];
extern int8 runtime·size_to_class128[(MaxSmallSize-1024)/128 + 1];
extern void runtime·InitSizes(void);
};
void runtime·MCentral_Init(MCentral *c, int32 sizeclass);
-int32 runtime·MCentral_AllocList(MCentral *c, int32 n, MLink **first);
+int32 runtime·MCentral_AllocList(MCentral *c, MLink **first);
void runtime·MCentral_FreeList(MCentral *c, int32 n, MLink *first);
void runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end);
l = &c->list[sizeclass];
if(l->list == nil) {
// Replenish using central lists.
- n = runtime·MCentral_AllocList(&runtime·mheap->central[sizeclass],
- runtime·class_to_transfercount[sizeclass], &first);
+ n = runtime·MCentral_AllocList(&runtime·mheap->central[sizeclass], &first);
if(n == 0)
runtime·throw("out of memory");
l->list = first;
if(l->nlist >= MaxMCacheListLen) {
// Release a chunk back.
- ReleaseN(c, l, runtime·class_to_transfercount[sizeclass], sizeclass);
+ ReleaseN(c, l, l->nlist/2, sizeclass);
}
if(c->size >= MaxMCacheSize) {
runtime·MSpanList_Init(&c->empty);
}
-// Allocate up to n objects from the central free list.
+// Allocate a list of objects from the central free list.
// Return the number of objects allocated.
// The objects are linked together by their first words.
// On return, *pstart points at the first object.
int32
-runtime·MCentral_AllocList(MCentral *c, int32 n, MLink **pfirst)
+runtime·MCentral_AllocList(MCentral *c, MLink **pfirst)
{
MSpan *s;
- MLink *first, *last;
- int32 cap, avail, i;
+ int32 cap, n;
runtime·lock(c);
// Replenish central list if empty.
}
s = c->nonempty.next;
cap = (s->npages << PageShift) / s->elemsize;
- avail = cap - s->ref;
- if(avail < n)
- n = avail;
-
- // First one is guaranteed to work, because we just grew the list.
- first = s->freelist;
- last = first;
- for(i=1; i<n; i++) {
- last = last->next;
- }
- s->freelist = last->next;
- last->next = nil;
+ n = cap - s->ref;
+ *pfirst = s->freelist;
+ s->freelist = nil;
s->ref += n;
c->nfree -= n;
-
- if(n == avail) {
- if(s->freelist != nil || s->ref != cap) {
- runtime·throw("invalid freelist");
- }
- runtime·MSpanList_Remove(s);
- runtime·MSpanList_Insert(&c->empty, s);
- }
-
+ runtime·MSpanList_Remove(s);
+ runtime·MSpanList_Insert(&c->empty, s);
runtime·unlock(c);
- *pfirst = first;
return n;
}
int32 runtime·class_to_size[NumSizeClasses];
int32 runtime·class_to_allocnpages[NumSizeClasses];
-int32 runtime·class_to_transfercount[NumSizeClasses];
// The SizeToClass lookup is implemented using two arrays,
// one mapping sizes <= 1024 to their class and one mapping
// Copy out for statistics table.
for(i=0; i<nelem(runtime·class_to_size); i++)
mstats.by_size[i].size = runtime·class_to_size[i];
-
- // Initialize the runtime·class_to_transfercount table.
- for(sizeclass = 1; sizeclass < NumSizeClasses; sizeclass++) {
- n = 64*1024 / runtime·class_to_size[sizeclass];
- if(n < 2)
- n = 2;
- if(n > 32)
- n = 32;
- runtime·class_to_transfercount[sizeclass] = n;
- }
return;
dump: