runtime: faster entersyscall/exitsyscall

author Russ Cox <rsc@golang.org>

Sat, 23 Jul 2011 16:22:55 +0000 (12:22 -0400)

committer Russ Cox <rsc@golang.org>

Sat, 23 Jul 2011 16:22:55 +0000 (12:22 -0400)
author Russ Cox <rsc@golang.org>
Sat, 23 Jul 2011 16:22:55 +0000 (12:22 -0400)
committer Russ Cox <rsc@golang.org>
Sat, 23 Jul 2011 16:22:55 +0000 (12:22 -0400)
diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c

index 6416651ce564c3c1e8ee7fa0877d6fc28cdcfc3d..13bc52bb68249eeb6599f3645993c91118f1d4ef 100644 (file)
--- a/src/pkg/runtime/proc.c
+++ b/src/pkg/runtime/proc.c
@@ -773,7 +773,7 @@ runtime·gosched(void)
  void
  runtime·entersyscall(void)
  {
-       uint32 v, w;
+       uint32 v;
  
         if(runtime·sched.predawn)
                 return;
@@ -796,24 +796,14 @@ runtime·entersyscall(void)
         //      mcpu--
         //      gwait not true
         //      waitstop && mcpu <= mcpumax not true
-       // If we can do the same with a single atomic read/write,
+       // If we can do the same with a single atomic add,
         // then we can skip the locks.
-       for(;;) {
-               v = runtime·sched.atomic;
-               if(atomic_gwaiting(v))
-                       break;
-               if(atomic_waitstop(v) && atomic_mcpu(v)-1 <= atomic_mcpumax(v))
-                       break;
-               w = v;
-               w += (-1<<mcpuShift);
-               if(runtime·cas(&runtime·sched.atomic, v, w))
-                       return;
-       }
+       v = runtime·xadd(&runtime·sched.atomic, -1<<mcpuShift);
+       if(!atomic_gwaiting(v) && (!atomic_waitstop(v) || atomic_mcpu(v) > atomic_mcpumax(v)))
+               return;
  
         schedlock();
-
-       // atomic { mcpu--; }
-       v = runtime·xadd(&runtime·sched.atomic, (-1<<mcpuShift));
+       v = runtime·atomicload(&runtime·sched.atomic);
         if(atomic_gwaiting(v)) {
                 matchmg();
                 v = runtime·atomicload(&runtime·sched.atomic);
@@ -837,43 +827,28 @@ runtime·entersyscall(void)
  void
  runtime·exitsyscall(void)
  {
-       uint32 v, w;
+       uint32 v;
  
         if(runtime·sched.predawn)
                 return;
  
         // Fast path.
-       // If we can do the mcpu-- bookkeeping and
+       // If we can do the mcpu++ bookkeeping and
         // find that we still have mcpu <= mcpumax, then we can
         // start executing Go code immediately, without having to
         // schedlock/schedunlock.
-       for(;;) {
-               // If the profiler frequency needs updating,
-               // take the slow path.
-               if(m->profilehz != runtime·sched.profilehz)
-                       break;
-
-               v = runtime·sched.atomic;
-               if(atomic_mcpu(v) >= atomic_mcpumax(v))
-                       break;
-
-               w = v;
-               w += (1<<mcpuShift);
-               if(runtime·cas(&runtime·sched.atomic, v, w)) {
-                       // There's a cpu for us, so we can run.
-                       g->status = Grunning;
-                       // Garbage collector isn't running (since we are),
-                       // so okay to clear gcstack.
-                       g->gcstack = nil;
-                       return;
-               }
+       v = runtime·xadd(&runtime·sched.atomic, (1<<mcpuShift));
+       if(m->profilehz == runtime·sched.profilehz && atomic_mcpu(v) <= atomic_mcpumax(v)) {
+               // There's a cpu for us, so we can run.
+               g->status = Grunning;
+               // Garbage collector isn't running (since we are),
+               // so okay to clear gcstack.
+               g->gcstack = nil;
+               return;
         }
  
         schedlock();
  
-       // atomic { mcpu++; }
-       runtime·xadd(&runtime·sched.atomic, (1<<mcpuShift));
-
         // Tell scheduler to put g back on the run queue:
         // mostly equivalent to g->status = Grunning,
         // but keeps the garbage collector from thinking
diff --git a/src/pkg/runtime/proc.p b/src/pkg/runtime/proc.p

index 337b07877301ed2ac9a5a3b2a015b87fdd6c8204..f0b46de611408c5d60414fd7d91013d42efe1713 100644 (file)
--- a/src/pkg/runtime/proc.p
+++ b/src/pkg/runtime/proc.p
@@ -3,8 +3,9 @@
  // license that can be found in the LICENSE file.
  
  /*
-model for proc.c as of 2011/07/15.
-takes 4300 seconds to explore 1128130 states
+model for proc.c as of 2011/07/22.
+
+takes 4900 seconds to explore 1189070 states
  with G=3, var_gomaxprocs=1
  on a Core i7 L640 2.13 GHz Lenovo X201s.
  
@@ -329,33 +330,53 @@ inline schedule() {
         nextgandunlock()
  }
  
+/*
+ * schedpend is > 0 if a goroutine is about to committed to
+ * entering the scheduler but has not yet done so.
+ * Just as we don't test for the undesirable conditions when a
+ * goroutine is in the scheduler, we don't test for them when
+ * a goroutine will be in the scheduler shortly.
+ * Modeling this state lets us replace mcpu cas loops with
+ * simpler mcpu atomic adds.
+ */
+byte schedpend;
+
  /*
   * entersyscall is like the C function.
   */
  inline entersyscall() {
+       bit willsched;
+
         /*
          * Fast path.  Check all the conditions tested during schedlock/schedunlock
          * below, and if we can get through the whole thing without stopping, run it
          * in one atomic cas-based step.
          */
         atomic {
+               atomic_mcpu--;
                 if
                 :: atomic_gwaiting ->
                         skip
-               :: atomic_waitstop && atomic_mcpu-1 <= atomic_mcpumax ->
+               :: atomic_waitstop && atomic_mcpu <= atomic_mcpumax ->
                         skip
                 :: else ->
-                       atomic_mcpu--;
                         goto Lreturn_entersyscall;
-               fi
+               fi;
+               willsched = 1;
+               schedpend++;
         }
  
         /*
          * Normal path.
          */
         schedlock()
-       d_step {
-               atomic_mcpu--;
+       opt_dstep {
+               if
+               :: willsched ->
+                       schedpend--;
+                       willsched = 0
+               :: else
+               fi
         }
         if
         :: atomic_gwaiting ->
@@ -382,11 +403,11 @@ inline exitsyscall() {
          */
         atomic {
                 // omitted profilehz check
+               atomic_mcpu++;
                 if
                 :: atomic_mcpu >= atomic_mcpumax ->
                         skip
                 :: else ->
-                       atomic_mcpu++;
                         goto Lreturn_exitsyscall
                 fi
         }
@@ -396,7 +417,6 @@ inline exitsyscall() {
          */
         schedlock();
         d_step {
-               atomic_mcpu++;
                 if
                 :: atomic_mcpu <= atomic_mcpumax ->
                         skip
@@ -497,10 +517,10 @@ active proctype monitor() {
  
         do
         // Should never have goroutines waiting with procs available.
-       :: !sched_lock && gwait > 0 && atomic_mcpu < atomic_mcpumax ->
+       :: !sched_lock && schedpend==0 && gwait > 0 && atomic_mcpu < atomic_mcpumax ->
                 assert 0
         // Should never have gc waiting for stop if things have already stopped.
-       :: !sched_lock && atomic_waitstop && atomic_mcpu <= atomic_mcpumax ->
+       :: !sched_lock && schedpend==0 && atomic_waitstop && atomic_mcpu <= atomic_mcpumax ->
                 assert 0
         od
  }
author	Russ Cox <rsc@golang.org>
	Sat, 23 Jul 2011 16:22:55 +0000 (12:22 -0400)
committer	Russ Cox <rsc@golang.org>
	Sat, 23 Jul 2011 16:22:55 +0000 (12:22 -0400)
src/pkg/runtime/proc.c		patch \| blob \| history
src/pkg/runtime/proc.p		patch \| blob \| history