From 6b2ec065871019d07dcbe6ca527fbd4c600e1c19 Mon Sep 17 00:00:00 2001 From: Dmitriy Vyukov Date: Thu, 21 Jul 2011 13:57:13 -0400 Subject: [PATCH] runtime: faster select Make selectsend() accept pointer to the element, it makes it possible to make Scase fixed-size and allocate/free Select, all Scase's and all SudoG at once. As a consequence SudoG freelist die out. benchmark old,ns/op new,ns/op BenchmarkSelectUncontended 1080 558 BenchmarkSelectUncontended-2 675 264 BenchmarkSelectUncontended-4 459 205 BenchmarkSelectContended 1086 560 BenchmarkSelectContended-2 1775 1672 BenchmarkSelectContended-4 2668 2149 (on Intel Q6600, 4 cores, 2.4GHz) benchmark old ns/op new ns/op delta BenchmarkSelectUncontended 517.00 326.00 -36.94% BenchmarkSelectUncontended-2 281.00 166.00 -40.93% BenchmarkSelectUncontended-4 250.00 83.10 -66.76% BenchmarkSelectUncontended-8 107.00 47.40 -55.70% BenchmarkSelectUncontended-16 67.80 41.30 -39.09% BenchmarkSelectContended 513.00 325.00 -36.65% BenchmarkSelectContended-2 699.00 628.00 -10.16% BenchmarkSelectContended-4 1085.00 1092.00 +0.65% BenchmarkSelectContended-8 3253.00 2477.00 -23.85% BenchmarkSelectContended-16 5313.00 5116.00 -3.71% (on Intel E5620, 8 HT cores, 2.4 GHz) R=rsc, ken CC=golang-dev https://golang.org/cl/4811041 --- src/cmd/gc/builtin.c.boot | 2 +- src/cmd/gc/go.h | 1 + src/cmd/gc/runtime.go | 2 +- src/cmd/gc/select.c | 7 +- src/cmd/gc/subr.c | 37 ++++-- src/cmd/ld/dwarf.c | 1 - src/pkg/runtime/chan.c | 236 ++++++++++++++------------------------ 7 files changed, 124 insertions(+), 162 deletions(-) diff --git a/src/cmd/gc/builtin.c.boot b/src/cmd/gc/builtin.c.boot index 95098c8afa..6419873a28 100644 --- a/src/cmd/gc/builtin.c.boot +++ b/src/cmd/gc/builtin.c.boot @@ -76,7 +76,7 @@ char *runtimeimport = "func \"\".selectnbrecv (elem *any, hchan <-chan any) bool\n" "func \"\".selectnbrecv2 (elem *any, received *bool, hchan <-chan any) bool\n" "func \"\".newselect (size int) *uint8\n" - "func \"\".selectsend (sel *uint8, hchan chan<- any, elem any) bool\n" + "func \"\".selectsend (sel *uint8, hchan chan<- any, elem *any) bool\n" "func \"\".selectrecv (sel *uint8, hchan <-chan any, elem *any) bool\n" "func \"\".selectrecv2 (sel *uint8, hchan <-chan any, elem *any, received *bool) bool\n" "func \"\".selectdefault (sel *uint8) bool\n" diff --git a/src/cmd/gc/go.h b/src/cmd/gc/go.h index 8ca086ee04..ff71e80a94 100644 --- a/src/cmd/gc/go.h +++ b/src/cmd/gc/go.h @@ -1137,6 +1137,7 @@ Sym* restrictlookup(char *name, Pkg *pkg); Node* safeexpr(Node *n, NodeList **init); void saveerrors(void); Node* cheapexpr(Node *n, NodeList **init); +Node* localexpr(Node *n, NodeList **init); int32 setlineno(Node *n); void setmaxarg(Type *t); Type* shallow(Type *t); diff --git a/src/cmd/gc/runtime.go b/src/cmd/gc/runtime.go index e13c95db93..7254f874e8 100644 --- a/src/cmd/gc/runtime.go +++ b/src/cmd/gc/runtime.go @@ -103,7 +103,7 @@ func selectnbrecv(elem *any, hchan <-chan any) bool func selectnbrecv2(elem *any, received *bool, hchan <-chan any) bool func newselect(size int) (sel *byte) -func selectsend(sel *byte, hchan chan<- any, elem any) (selected bool) +func selectsend(sel *byte, hchan chan<- any, elem *any) (selected bool) func selectrecv(sel *byte, hchan <-chan any, elem *any) (selected bool) func selectrecv2(sel *byte, hchan <-chan any, elem *any, received *bool) (selected bool) func selectdefault(sel *byte) (selected bool) diff --git a/src/cmd/gc/select.c b/src/cmd/gc/select.c index 91d4ebfd50..14ec015f2d 100644 --- a/src/cmd/gc/select.c +++ b/src/cmd/gc/select.c @@ -309,7 +309,12 @@ walkselect(Node *sel) fatal("select %O", n->op); case OSEND: - // selectsend(sel *byte, hchan *chan any, elem any) (selected bool); + // selectsend(sel *byte, hchan *chan any, elem *any) (selected bool); + n->left = safeexpr(n->left, &r->ninit); + n->right = localexpr(n->right, &r->ninit); + n->right = nod(OADDR, n->right, N); + n->right->etype = 1; // pointer does not escape + typecheck(&n->right, Erv); r->ntest = mkcall1(chanfn("selectsend", 2, n->left->type), types[TBOOL], &init, var, n->left, n->right); break; diff --git a/src/cmd/gc/subr.c b/src/cmd/gc/subr.c index 4253deabb2..96727b10bd 100644 --- a/src/cmd/gc/subr.c +++ b/src/cmd/gc/subr.c @@ -2750,6 +2750,20 @@ safeexpr(Node *n, NodeList **init) return cheapexpr(n, init); } +static Node* +copyexpr(Node *n, NodeList **init) +{ + Node *a, *l; + + l = nod(OXXX, N, N); + tempname(l, n->type); + a = nod(OAS, l, n); + typecheck(&a, Etop); + walkexpr(&a, init); + *init = list(*init, a); + return l; +} + /* * return side-effect free and cheap n, appending side effects to init. * result may not be assignable. @@ -2757,21 +2771,26 @@ safeexpr(Node *n, NodeList **init) Node* cheapexpr(Node *n, NodeList **init) { - Node *a, *l; - switch(n->op) { case ONAME: case OLITERAL: return n; } - l = nod(OXXX, N, N); - tempname(l, n->type); - a = nod(OAS, l, n); - typecheck(&a, Etop); - walkexpr(&a, init); - *init = list(*init, a); - return l; + return copyexpr(n, init); +} + +/* + * return n in a local variable if it is not already. + */ +Node* +localexpr(Node *n, NodeList **init) +{ + if(n->op == ONAME && + (n->class == PAUTO || n->class == PPARAM || n->class == PPARAMOUT)) + return n; + + return copyexpr(n, init); } void diff --git a/src/cmd/ld/dwarf.c b/src/cmd/ld/dwarf.c index d02fff3c26..d8ca27acea 100644 --- a/src/cmd/ld/dwarf.c +++ b/src/cmd/ld/dwarf.c @@ -1422,7 +1422,6 @@ synthesizechantypes(DWDie *die) copychildren(dwh, hchan); substitutetype(dwh, "recvq", dww); substitutetype(dwh, "sendq", dww); - substitutetype(dwh, "free", defptrto(dws)); newattr(dwh, DW_AT_byte_size, DW_CLS_CONSTANT, getattr(hchan, DW_AT_byte_size)->value, nil); diff --git a/src/pkg/runtime/chan.c b/src/pkg/runtime/chan.c index 0fdf771968..7010d06d18 100644 --- a/src/pkg/runtime/chan.c +++ b/src/pkg/runtime/chan.c @@ -19,8 +19,6 @@ struct SudoG { G* g; // g and selgen constitute uint32 selgen; // a weak pointer to g - int16 offset; // offset of case number - int8 isfree; // offset of case number SudoG* link; byte* elem; // data element }; @@ -43,7 +41,6 @@ struct Hchan uint32 recvx; // receive index WaitQ recvq; // list of recv waiters WaitQ sendq; // list of send waiters - SudoG* free; // freelist Lock; }; @@ -61,33 +58,26 @@ enum struct Scase { + SudoG sg; // must be first member (cast to Scase) Hchan* chan; // chan byte* pc; // return pc uint16 kind; uint16 so; // vararg of selected bool - union { - byte elem[2*sizeof(void*)]; // element (send) - struct { - byte* elemp; // pointer to element (recv) - bool* receivedp; // pointer to received bool (recv2) - } recv; - } u; + bool* receivedp; // pointer to received bool (recv2) }; struct Select { uint16 tcase; // total count of scase[] uint16 ncase; // currently filled scase[] - Select* link; // for freelist - uint16* order; - Scase* scase[1]; // one per case + uint16* pollorder; // case poll order + Hchan** lockorder; // channel lock order + Scase scase[1]; // one per case (in order of appearance) }; -static void dequeueg(WaitQ*, Hchan*); -static SudoG* dequeue(WaitQ*, Hchan*); +static void dequeueg(WaitQ*); +static SudoG* dequeue(WaitQ*); static void enqueue(WaitQ*, SudoG*); -static SudoG* allocsg(Hchan*); -static void freesg(Hchan*, SudoG*); static void destroychan(Hchan*); Hchan* @@ -192,7 +182,7 @@ runtime·chansend(Hchan *c, byte *ep, bool *pres) if(c->dataqsiz > 0) goto asynch; - sg = dequeue(&c->recvq, c); + sg = dequeue(&c->recvq); if(sg != nil) { runtime·unlock(c); @@ -257,7 +247,7 @@ asynch: c->sendx = 0; c->qcount++; - sg = dequeue(&c->recvq, c); + sg = dequeue(&c->recvq); if(sg != nil) { gp = sg->g; runtime·unlock(c); @@ -297,7 +287,7 @@ runtime·chanrecv(Hchan* c, byte *ep, bool *selected, bool *received) if(c->closed) goto closed; - sg = dequeue(&c->sendq, c); + sg = dequeue(&c->sendq); if(sg != nil) { runtime·unlock(c); @@ -370,7 +360,7 @@ asynch: c->recvx = 0; c->qcount--; - sg = dequeue(&c->sendq, c); + sg = dequeue(&c->sendq); if(sg != nil) { gp = sg->g; runtime·unlock(c); @@ -619,57 +609,53 @@ newselect(int32 size, Select **selp) if(size > 1) n = size-1; - sel = runtime·mal(sizeof(*sel) + n*sizeof(sel->scase[0]) + size*sizeof(sel->order[0])); + sel = runtime·mal(sizeof(*sel) + + n*sizeof(sel->scase[0]) + + size*sizeof(sel->lockorder[0]) + + size*sizeof(sel->pollorder[0])); sel->tcase = size; sel->ncase = 0; - sel->order = (void*)(sel->scase + size); + sel->pollorder = (void*)(sel->scase + size); + sel->lockorder = (void*)(sel->pollorder + size); *selp = sel; + if(debug) runtime·printf("newselect s=%p size=%d\n", sel, size); } // cut in half to give stack a chance to split -static void selectsend(Select **selp, Hchan *c, void *pc); +static void selectsend(Select *sel, Hchan *c, void *pc, void *elem, int32 so); -// selectsend(sel *byte, hchan *chan any, elem any) (selected bool); +// selectsend(sel *byte, hchan *chan any, elem *any) (selected bool); #pragma textflag 7 void -runtime·selectsend(Select *sel, Hchan *c, ...) +runtime·selectsend(Select *sel, Hchan *c, void *elem, bool selected) { // nil cases do not compete if(c == nil) return; - selectsend(&sel, c, runtime·getcallerpc(&sel)); + selectsend(sel, c, runtime·getcallerpc(&sel), elem, (byte*)&selected - (byte*)&sel); } static void -selectsend(Select **selp, Hchan *c, void *pc) +selectsend(Select *sel, Hchan *c, void *pc, void *elem, int32 so) { - int32 i, eo; + int32 i; Scase *cas; - byte *ae; - Select *sel; - sel = *selp; i = sel->ncase; if(i >= sel->tcase) runtime·throw("selectsend: too many cases"); sel->ncase = i+1; - cas = runtime·mal(sizeof *cas + c->elemsize - sizeof(cas->u.elem)); - sel->scase[i] = cas; + cas = &sel->scase[i]; cas->pc = pc; cas->chan = c; - - eo = runtime·rnd(sizeof(sel), sizeof(c)); - eo = runtime·rnd(eo+sizeof(c), c->elemsize); - cas->so = runtime·rnd(eo+c->elemsize, Structrnd); + cas->so = so; cas->kind = CaseSend; - - ae = (byte*)selp + eo; - c->elemalg->copy(c->elemsize, cas->u.elem, ae); + cas->sg.elem = elem; if(debug) runtime·printf("selectsend s=%p pc=%p chan=%p so=%d\n", @@ -713,15 +699,14 @@ selectrecv(Select *sel, Hchan *c, void *pc, void *elem, bool *received, int32 so if(i >= sel->tcase) runtime·throw("selectrecv: too many cases"); sel->ncase = i+1; - cas = runtime·mal(sizeof *cas); - sel->scase[i] = cas; + cas = &sel->scase[i]; cas->pc = pc; cas->chan = c; cas->so = so; cas->kind = CaseRecv; - cas->u.recv.elemp = elem; - cas->u.recv.receivedp = received; + cas->sg.elem = elem; + cas->receivedp = received; if(debug) runtime·printf("selectrecv s=%p pc=%p chan=%p so=%d\n", @@ -749,8 +734,7 @@ selectdefault(Select *sel, void *callerpc, int32 so) if(i >= sel->tcase) runtime·throw("selectdefault: too many cases"); sel->ncase = i+1; - cas = runtime·mal(sizeof *cas); - sel->scase[i] = cas; + cas = &sel->scase[i]; cas->pc = callerpc; cas->chan = nil; @@ -762,26 +746,17 @@ selectdefault(Select *sel, void *callerpc, int32 so) sel, cas->pc, cas->so); } -static void -freesel(Select *sel) -{ - uint32 i; - - for(i=0; incase; i++) - runtime·free(sel->scase[i]); - runtime·free(sel); -} - static void sellock(Select *sel) { uint32 i; - Hchan *c; + Hchan *c, *c0; c = nil; for(i=0; incase; i++) { - if(sel->scase[i]->chan != c) { - c = sel->scase[i]->chan; + c0 = sel->lockorder[i]; + if(c0 && c0 != c) { + c = sel->lockorder[i]; runtime·lock(c); } } @@ -791,12 +766,13 @@ static void selunlock(Select *sel) { uint32 i; - Hchan *c; + Hchan *c, *c0; c = nil; - for(i=sel->ncase; i>0; i--) { - if(sel->scase[i-1]->chan && sel->scase[i-1]->chan != c) { - c = sel->scase[i-1]->chan; + for(i=sel->ncase; i-->0;) { + c0 = sel->lockorder[i]; + if(c0 && c0 != c) { + c = c0; runtime·unlock(c); } } @@ -851,20 +827,20 @@ selectgo(Select **selp) // generate permuted order for(i=0; incase; i++) - sel->order[i] = i; + sel->pollorder[i] = i; for(i=1; incase; i++) { - o = sel->order[i]; + o = sel->pollorder[i]; j = runtime·fastrand1()%(i+1); - sel->order[i] = sel->order[j]; - sel->order[j] = o; + sel->pollorder[i] = sel->pollorder[j]; + sel->pollorder[j] = o; } // sort the cases by Hchan address to get the locking order. - for(i=1; incase; i++) { - cas = sel->scase[i]; - for(j=i; j>0 && sel->scase[j-1]->chan >= cas->chan; j--) - sel->scase[j] = sel->scase[j-1]; - sel->scase[j] = cas; + for(i=0; incase; i++) { + c = sel->scase[i].chan; + for(j=i; j>0 && sel->lockorder[j-1] >= c; j--) + sel->lockorder[j] = sel->lockorder[j-1]; + sel->lockorder[j] = c; } sellock(sel); @@ -872,8 +848,8 @@ loop: // pass 1 - look for something already waiting dfl = nil; for(i=0; incase; i++) { - o = sel->order[i]; - cas = sel->scase[o]; + o = sel->pollorder[i]; + cas = &sel->scase[o]; c = cas->chan; switch(cas->kind) { @@ -882,7 +858,7 @@ loop: if(c->qcount > 0) goto asyncrecv; } else { - sg = dequeue(&c->sendq, c); + sg = dequeue(&c->sendq); if(sg != nil) goto syncrecv; } @@ -897,7 +873,7 @@ loop: if(c->qcount < c->dataqsiz) goto asyncsend; } else { - sg = dequeue(&c->recvq, c); + sg = dequeue(&c->recvq); if(sg != nil) goto syncsend; } @@ -918,20 +894,18 @@ loop: // pass 2 - enqueue on all chans for(i=0; incase; i++) { - o = sel->order[i]; - cas = sel->scase[o]; + cas = &sel->scase[i]; c = cas->chan; - sg = allocsg(c); - sg->offset = o; + sg = &cas->sg; + sg->g = g; + sg->selgen = g->selgen; switch(cas->kind) { case CaseRecv: - sg->elem = cas->u.recv.elemp; enqueue(&c->recvq, sg); break; case CaseSend: - sg->elem = cas->u.elem; enqueue(&c->sendq, sg); break; } @@ -948,50 +922,48 @@ loop: // pass 3 - dequeue from unsuccessful chans // otherwise they stack up on quiet channels for(i=0; incase; i++) { - if(sg == nil || i != sg->offset) { - cas = sel->scase[i]; + cas = &sel->scase[i]; + if(cas != (Scase*)sg) { c = cas->chan; if(cas->kind == CaseSend) - dequeueg(&c->sendq, c); + dequeueg(&c->sendq); else - dequeueg(&c->recvq, c); + dequeueg(&c->recvq); } } if(sg == nil) goto loop; - o = sg->offset; - cas = sel->scase[o]; + cas = (Scase*)sg; c = cas->chan; if(c->dataqsiz > 0) runtime·throw("selectgo: shouldnt happen"); if(debug) - runtime·printf("wait-return: sel=%p c=%p cas=%p kind=%d o=%d\n", - sel, c, cas, cas->kind, o); + runtime·printf("wait-return: sel=%p c=%p cas=%p kind=%d\n", + sel, c, cas, cas->kind); if(cas->kind == CaseRecv) { - if(cas->u.recv.receivedp != nil) - *cas->u.recv.receivedp = true; + if(cas->receivedp != nil) + *cas->receivedp = true; } - freesg(c, sg); selunlock(sel); goto retc; asyncrecv: // can receive from buffer - if(cas->u.recv.receivedp != nil) - *cas->u.recv.receivedp = true; - if(cas->u.recv.elemp != nil) - c->elemalg->copy(c->elemsize, cas->u.recv.elemp, chanbuf(c, c->recvx)); + if(cas->receivedp != nil) + *cas->receivedp = true; + if(cas->sg.elem != nil) + c->elemalg->copy(c->elemsize, cas->sg.elem, chanbuf(c, c->recvx)); c->elemalg->copy(c->elemsize, chanbuf(c, c->recvx), nil); if(++c->recvx == c->dataqsiz) c->recvx = 0; c->qcount--; - sg = dequeue(&c->sendq, c); + sg = dequeue(&c->sendq); if(sg != nil) { gp = sg->g; selunlock(sel); @@ -1003,11 +975,11 @@ asyncrecv: asyncsend: // can send to buffer - c->elemalg->copy(c->elemsize, chanbuf(c, c->sendx), cas->u.elem); + c->elemalg->copy(c->elemsize, chanbuf(c, c->sendx), cas->sg.elem); if(++c->sendx == c->dataqsiz) c->sendx = 0; c->qcount++; - sg = dequeue(&c->recvq, c); + sg = dequeue(&c->recvq); if(sg != nil) { gp = sg->g; selunlock(sel); @@ -1022,10 +994,10 @@ syncrecv: selunlock(sel); if(debug) runtime·printf("syncrecv: sel=%p c=%p o=%d\n", sel, c, o); - if(cas->u.recv.receivedp != nil) - *cas->u.recv.receivedp = true; - if(cas->u.recv.elemp != nil) - c->elemalg->copy(c->elemsize, cas->u.recv.elemp, sg->elem); + if(cas->receivedp != nil) + *cas->receivedp = true; + if(cas->sg.elem != nil) + c->elemalg->copy(c->elemsize, cas->sg.elem, sg->elem); gp = sg->g; gp->param = sg; runtime·ready(gp); @@ -1034,10 +1006,10 @@ syncrecv: rclose: // read at end of closed channel selunlock(sel); - if(cas->u.recv.receivedp != nil) - *cas->u.recv.receivedp = false; - if(cas->u.recv.elemp != nil) - c->elemalg->copy(c->elemsize, cas->u.recv.elemp, nil); + if(cas->receivedp != nil) + *cas->receivedp = false; + if(cas->sg.elem != nil) + c->elemalg->copy(c->elemsize, cas->sg.elem, nil); goto retc; syncsend: @@ -1045,8 +1017,7 @@ syncsend: selunlock(sel); if(debug) runtime·printf("syncsend: sel=%p c=%p o=%d\n", sel, c, o); - if(sg->elem != nil) - c->elemalg->copy(c->elemsize, sg->elem, cas->u.elem); + c->elemalg->copy(c->elemsize, sg->elem, cas->sg.elem); gp = sg->g; gp->param = sg; runtime·ready(gp); @@ -1055,7 +1026,7 @@ retc: // return to pc corresponding to chosen case pc = cas->pc; as = (byte*)selp + cas->so; - freesel(sel); + runtime·free(sel); *as = true; return pc; @@ -1086,7 +1057,7 @@ runtime·closechan(Hchan *c) // release all readers for(;;) { - sg = dequeue(&c->recvq, c); + sg = dequeue(&c->recvq); if(sg == nil) break; gp = sg->g; @@ -1096,7 +1067,7 @@ runtime·closechan(Hchan *c) // release all writers for(;;) { - sg = dequeue(&c->sendq, c); + sg = dequeue(&c->sendq); if(sg == nil) break; gp = sg->g; @@ -1140,7 +1111,7 @@ reflect·chancap(Hchan *c, int32 cap) } static SudoG* -dequeue(WaitQ *q, Hchan *c) +dequeue(WaitQ *q) { SudoG *sgp; @@ -1155,7 +1126,6 @@ loop: (sgp->selgen != sgp->g->selgen || !runtime·cas(&sgp->g->selgen, sgp->selgen, sgp->selgen + 2))) { //prints("INVALID PSEUDOG POINTER\n"); - freesg(c, sgp); goto loop; } @@ -1163,7 +1133,7 @@ loop: } static void -dequeueg(WaitQ *q, Hchan *c) +dequeueg(WaitQ *q) { SudoG **l, *sgp, *prevsgp; @@ -1171,7 +1141,6 @@ dequeueg(WaitQ *q, Hchan *c) for(l=&q->first; (sgp=*l) != nil; l=&sgp->link, prevsgp=sgp) { if(sgp->g == g) { *l = sgp->link; - freesg(c, sgp); if(q->last == sgp) q->last = prevsgp; break; @@ -1191,34 +1160,3 @@ enqueue(WaitQ *q, SudoG *sgp) q->last->link = sgp; q->last = sgp; } - -static SudoG* -allocsg(Hchan *c) -{ - SudoG* sg; - - sg = c->free; - if(sg != nil) { - c->free = sg->link; - } else - sg = runtime·mal(sizeof(*sg)); - sg->selgen = g->selgen; - sg->g = g; - sg->offset = 0; - sg->isfree = 0; - - return sg; -} - -static void -freesg(Hchan *c, SudoG *sg) -{ - if(sg != nil) { - if(sg->isfree) - runtime·throw("chan.freesg: already free"); - sg->isfree = 1; - sg->link = c->free; - sg->elem = nil; - c->free = sg; - } -} -- 2.50.0