onM(stoptheworld)
        onM(finishsweep_m) // finish sweep before we start concurrent scan.
-       onM(starttheworld)
-
-       // Do a concurrent heap scan before we stop the world.
-       onM(gcscan_m)
-       onM(gcinstallmarkwb_m)
-       onM(stoptheworld)
-       //      onM(starttheworld)
-       // mark from roots scanned in gcscan_m. startthework when write barrier works
-       onM(gcmark_m)
-       //      onM(stoptheworld)
+       if false {         // To turn on concurrent scan and mark set to true...
+               onM(starttheworld)
+               // Do a concurrent heap scan before we stop the world.
+               onM(gcscan_m)
+               onM(stoptheworld)
+               onM(gcinstallmarkwb_m)
+               onM(starttheworld)
+               onM(gcmark_m)
+               onM(stoptheworld)
+               onM(gcinstalloffwb_m)
+       }
        if mp != acquirem() {
                gothrow("gogc: rescheduled")
        }
 
 //
 // Shade indicates that it has seen a white pointer by adding the referent
 // to wbuf.
+// slot is the destination (dst) in go code
+// ptr is the value that goes into the slot (src) in the go code
 void
-runtime·markwb(void **slot, void *ptr)
+runtime·gcmarkwb_m()
 {
-       // initial nil check avoids some needlesss loads
-       if(ptr != nil && inheap(ptr) && shaded((void*)slot))
-               shade(ptr);
+       byte **slot, *ptr;
+       slot = (byte**)g->m->scalararg[0];
+       ptr = (byte*)g->m->scalararg[1];
+
        *slot = ptr;
+       switch(runtime·gcphase) {
+       default:
+               runtime·throw("gcphasework in bad gcphase");
+       case GCoff:
+       case GCquiesce:
+       case GCstw:
+       case GCsweep:
+       case GCscan:
+               break;
+       case GCmark:
+               if(ptr != nil && inheap(ptr) && shaded((byte*)slot))
+                       shade(ptr);
+               break;
+       case GCmarktermination:
+               if(ptr != nil && inheap(ptr) && shaded((byte*)slot))
+                       shade(ptr);
+               break;
+       }
 }
 
 // The gp has been moved to a GC safepoint. GC phase specific
        scanblock(nil, 0, nil);
 }
 
-// For now this must be followed by a stoptheworld and a starttheworld to ensure
+// For now this must be bracketed with a stoptheworld and a starttheworld to ensure
 // all go routines see the new barrier.
 void
 runtime·gcinstallmarkwb_m(void)
        runtime·gcphase = GCmark;
 }
 
+// For now this must be bracketed with a stoptheworld and a starttheworld to ensure
+// all go routines see the new barrier.
+void
+runtime·gcinstalloffwb_m(void)
+{
+       runtime·gcphase = GCoff;
+}
+
 static void
 gc(struct gc_args *args)
 {
 
        if src != 0 && (src < _PageSize || src == _PoisonGC || src == _PoisonStack) {
                onM(func() { gothrow("bad pointer in write barrier") })
        }
-       *dst = src
+
+       mp := acquirem()
+       if mp.inwb {
+               *dst = src
+               releasem(mp)
+               return
+       }
+       mp.inwb = true
+       oldscalar0 := mp.scalararg[0]
+       oldscalar1 := mp.scalararg[1]
+       mp.scalararg[0] = uintptr(unsafe.Pointer(dst))
+       mp.scalararg[1] = src
+       onM_signalok(gcmarkwb_m)
+       mp.scalararg[0] = oldscalar0
+       mp.scalararg[1] = oldscalar1
+       mp.inwb = false
+       releasem(mp)
+       //      *dst = src is done inside of the write barrier.
 }
 
 //go:nosplit
 
        int32   helpgc;
        bool    spinning;       // M is out of work and is actively looking for work
        bool    blocked;        // M is blocked on a Note
+       bool    inwb;           // M is executing a write barrier
        int8    printlock;
        uint32  fastrand;
        uint64  ncgocall;       // number of cgo calls in total