Currently this leads to a significant skew towards 'etext' entry,
since all idle threads are profiled every tick.
Before:
Total: 66608 samples
63188 94.9% 94.9% 63188 94.9% etext
278 0.4% 95.3% 278 0.4% sweepspan
216 0.3% 95.6% 448 0.7% runtime.mallocgc
122 0.2% 95.8% 122 0.2% scanblock
113 0.2% 96.0% 113 0.2% net/textproto.canonicalMIMEHeaderKey
After:
Total: 8008 samples
3949 49.3% 49.3% 3949 49.3% etext
231 2.9% 52.2% 231 2.9% scanblock
211 2.6% 54.8% 211 2.6% runtime.cas64
182 2.3% 57.1% 408 5.1% runtime.mallocgc
178 2.2% 59.3% 178 2.2% runtime.atomicload64
LGTM=alex.brainman
R=golang-codereviews, alex.brainman
CC=golang-codereviews
https://golang.org/cl/
61250043
{
if(g != m->g0)
runtime·throw("notesleep not on g0");
- while(runtime·atomicload((uint32*)&n->key) == 0)
+ while(runtime·atomicload((uint32*)&n->key) == 0) {
+ m->blocked = true;
runtime·futexsleep((uint32*)&n->key, 0, -1);
+ m->blocked = false;
+ }
}
#pragma textflag NOSPLIT
// does not count against our nosplit stack sequence.
if(ns < 0) {
- while(runtime·atomicload((uint32*)&n->key) == 0)
+ while(runtime·atomicload((uint32*)&n->key) == 0) {
+ m->blocked = true;
runtime·futexsleep((uint32*)&n->key, 0, -1);
+ m->blocked = false;
+ }
return true;
}
deadline = runtime·nanotime() + ns;
for(;;) {
+ m->blocked = true;
runtime·futexsleep((uint32*)&n->key, 0, ns);
+ m->blocked = false;
if(runtime·atomicload((uint32*)&n->key) != 0)
break;
now = runtime·nanotime();
return;
}
// Queued. Sleep.
+ m->blocked = true;
runtime·semasleep(-1);
+ m->blocked = false;
}
#pragma textflag NOSPLIT
if(ns < 0) {
// Queued. Sleep.
+ m->blocked = true;
runtime·semasleep(-1);
+ m->blocked = false;
return true;
}
deadline = runtime·nanotime() + ns;
for(;;) {
// Registered. Sleep.
+ m->blocked = true;
if(runtime·semasleep(ns) >= 0) {
+ m->blocked = false;
// Acquired semaphore, semawakeup unregistered us.
// Done.
return true;
}
+ m->blocked = false;
// Interrupted or timed out. Still registered. Semaphore not acquired.
ns = deadline - runtime·nanotime();
} else if(mp == (M*)LOCKED) {
// Wakeup happened so semaphore is available.
// Grab it to avoid getting out of sync.
+ m->blocked = true;
if(runtime·semasleep(-1) < 0)
runtime·throw("runtime: unable to acquire - semaphore out of sync");
+ m->blocked = false;
return true;
} else
runtime·throw("runtime: unexpected waitm - semaphore out of sync");
allm = runtime·atomicloadp(&runtime·allm);
for(mp = allm; mp != nil; mp = mp->alllink) {
thread = runtime·atomicloadp(&mp->thread);
- if(thread == nil)
+ // Do not profile threads blocked on Notes,
+ // this includes idle worker threads,
+ // idle timer thread, idle heap scavenger, etc.
+ if(thread == nil || mp->profilehz == 0 || mp->blocked)
continue;
runtime·stdcall(runtime·SuspendThread, 1, thread);
- if(mp->profilehz != 0)
+ if(mp->profilehz != 0 && !mp->blocked)
profilem(mp);
runtime·stdcall(runtime·ResumeThread, 1, thread);
}
if(lockedg)
id3 = lockedg->goid;
runtime·printf(" M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
- " locks=%d dying=%d helpgc=%d spinning=%d lockedg=%D\n",
+ " locks=%d dying=%d helpgc=%d spinning=%d blocked=%d lockedg=%D\n",
mp->id, id1, id2,
mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
- mp->spinning, id3);
+ mp->spinning, m->blocked, id3);
}
runtime·lock(&allglock);
for(gi = 0; gi < runtime·allglen; gi++) {
int32 dying;
int32 profilehz;
int32 helpgc;
- bool spinning;
+ bool spinning; // M is out of work and is actively looking for work
+ bool blocked; // M is blocked on a Note
uint32 fastrand;
uint64 ncgocall; // number of cgo calls in total
int32 ncgo; // number of cgo calls currently in progress