The deadlock episodically occurs on misc/cgo/test/TestCthread.
The problem is that starttheworld() leaves some P's with local work
without M's. Then all active M's enter into syscalls, but reject to
wake another M's due to the following check (both in entersyscallblock() and in retake()):
if(p->runqhead == p->runqtail &&
runtime·atomicload(&runtime·sched.nmspinning) +
runtime·atomicload(&runtime·sched.npidle) > 0)
continue;
R=rsc
CC=golang-dev
https://golang.org/cl/
7424054
void
runtime·starttheworld(void)
{
- P *p;
+ P *p, *p1;
M *mp;
bool add;
procresize(runtime·gomaxprocs);
runtime·gcwaiting = 0;
+ p1 = nil;
while(p = pidleget()) {
// procresize() puts p's with work at the beginning of the list.
// Once we reach a p without a run queue, the rest don't have one either.
}
mp = mget();
if(mp == nil) {
- pidleput(p);
- break;
+ p->link = p1;
+ p1 = p;
+ continue;
}
if(mp->nextp)
runtime·throw("starttheworld: inconsistent mp->nextp");
}
runtime·unlock(&runtime·sched);
+ while(p1) {
+ p = p1;
+ p1 = p1->link;
+ add = false;
+ newm(nil, p);
+ }
+
if(add) {
// If GC could have used another helper proc, start one now,
// in the hope that it will be available next time.