// trace is global tracing context.
 var trace struct {
+       // trace.lock must only be acquired on the system stack where
+       // stack splits cannot happen while it is held.
        lock          mutex       // protects the following members
        lockOwner     *g          // to avoid deadlocks during recursive lock locks
        enabled       bool        // when set runtime traces events
                raceacquire(unsafe.Pointer(&trace.shutdownSema))
        }
 
-       // The lock protects us from races with StartTrace/StopTrace because they do stop-the-world.
-       lock(&trace.lock)
-       for _, p := range allp[:cap(allp)] {
-               if p.tracebuf != 0 {
-                       throw("trace: non-empty trace buffer in proc")
+       systemstack(func() {
+               // The lock protects us from races with StartTrace/StopTrace because they do stop-the-world.
+               lock(&trace.lock)
+               for _, p := range allp[:cap(allp)] {
+                       if p.tracebuf != 0 {
+                               throw("trace: non-empty trace buffer in proc")
+                       }
                }
-       }
-       if trace.buf != 0 {
-               throw("trace: non-empty global trace buffer")
-       }
-       if trace.fullHead != 0 || trace.fullTail != 0 {
-               throw("trace: non-empty full trace buffer")
-       }
-       if trace.reading != 0 || trace.reader.Load() != nil {
-               throw("trace: reading after shutdown")
-       }
-       for trace.empty != 0 {
-               buf := trace.empty
-               trace.empty = buf.ptr().link
-               sysFree(unsafe.Pointer(buf), unsafe.Sizeof(*buf.ptr()), &memstats.other_sys)
-       }
-       trace.strings = nil
-       trace.shutdown = false
-       trace.cpuLogRead = nil
-       unlock(&trace.lock)
+               if trace.buf != 0 {
+                       throw("trace: non-empty global trace buffer")
+               }
+               if trace.fullHead != 0 || trace.fullTail != 0 {
+                       throw("trace: non-empty full trace buffer")
+               }
+               if trace.reading != 0 || trace.reader.Load() != nil {
+                       throw("trace: reading after shutdown")
+               }
+               for trace.empty != 0 {
+                       buf := trace.empty
+                       trace.empty = buf.ptr().link
+                       sysFree(unsafe.Pointer(buf), unsafe.Sizeof(*buf.ptr()), &memstats.other_sys)
+               }
+               trace.strings = nil
+               trace.shutdown = false
+               trace.cpuLogRead = nil
+               unlock(&trace.lock)
+       })
 }
 
 // ReadTrace returns the next chunk of binary tracing data, blocking until data
 // ReadTrace must be called from one goroutine at a time.
 func ReadTrace() []byte {
 top:
+       var buf []byte
+       var park bool
+       systemstack(func() {
+               buf, park = readTrace0()
+       })
+       if park {
+               gopark(func(gp *g, _ unsafe.Pointer) bool {
+                       if !trace.reader.CompareAndSwapNoWB(nil, gp) {
+                               // We're racing with another reader.
+                               // Wake up and handle this case.
+                               return false
+                       }
+
+                       if g2 := traceReader(); gp == g2 {
+                               // New data arrived between unlocking
+                               // and the CAS and we won the wake-up
+                               // race, so wake up directly.
+                               return false
+                       } else if g2 != nil {
+                               printlock()
+                               println("runtime: got trace reader", g2, g2.goid)
+                               throw("unexpected trace reader")
+                       }
+
+                       return true
+               }, nil, waitReasonTraceReaderBlocked, traceEvGoBlock, 2)
+               goto top
+       }
+
+       return buf
+}
+
+// readTrace0 is ReadTrace's continuation on g0. This must run on the
+// system stack because it acquires trace.lock.
+//
+//go:systemstack
+func readTrace0() (buf []byte, park bool) {
+       if raceenabled {
+               // g0 doesn't have a race context. Borrow the user G's.
+               if getg().racectx != 0 {
+                       throw("expected racectx == 0")
+               }
+               getg().racectx = getg().m.curg.racectx
+               // (This defer should get open-coded, which is safe on
+               // the system stack.)
+               defer func() { getg().racectx = 0 }()
+       }
+
        // This function may need to lock trace.lock recursively
        // (goparkunlock -> traceGoPark -> traceEvent -> traceFlush).
        // To allow this we use trace.lockOwner.
        // allocation can call heap allocate, which will try to emit a trace
        // event while holding heap lock.
        lock(&trace.lock)
-       trace.lockOwner = getg()
+       trace.lockOwner = getg().m.curg
 
        if trace.reader.Load() != nil {
                // More than one goroutine reads trace. This is bad.
                trace.lockOwner = nil
                unlock(&trace.lock)
                println("runtime: ReadTrace called from multiple goroutines simultaneously")
-               return nil
+               return nil, false
        }
        // Recycle the old buffer.
        if buf := trace.reading; buf != 0 {
                trace.headerWritten = true
                trace.lockOwner = nil
                unlock(&trace.lock)
-               return []byte("go 1.19 trace\x00\x00\x00")
+               return []byte("go 1.19 trace\x00\x00\x00"), false
        }
        // Optimistically look for CPU profile samples. This may write new stack
        // records, and may write new tracing buffers.
                // We don't simply use a note because the scheduler
                // executes this goroutine directly when it wakes up
                // (also a note would consume an M).
+               trace.lockOwner = nil
                unlock(&trace.lock)
-               gopark(func(gp *g, _ unsafe.Pointer) bool {
-                       if !trace.reader.CompareAndSwapNoWB(nil, gp) {
-                               // We're racing with another reader.
-                               // Wake up and handle this case.
-                               return false
-                       }
-
-                       if g2 := traceReader(); gp == g2 {
-                               // New data arrived between unlocking
-                               // and the CAS and we won the wake-up
-                               // race, so wake up directly.
-                               return false
-                       } else if g2 != nil {
-                               printlock()
-                               println("runtime: got trace reader", g2, g2.goid)
-                               throw("unexpected trace reader")
-                       }
-
-                       return true
-               }, nil, waitReasonTraceReaderBlocked, traceEvGoBlock, 2)
-               goto top
+               return nil, true
        }
-
 newFull:
        assertLockHeld(&trace.lock)
        // Write a buffer.
                trace.reading = buf
                trace.lockOwner = nil
                unlock(&trace.lock)
-               return buf.ptr().arr[:buf.ptr().pos]
+               return buf.ptr().arr[:buf.ptr().pos], false
        }
 
        // Write footer with timer frequency.
                }
                // trace.enabled is already reset, so can call traceable functions.
                semrelease(&trace.shutdownSema)
-               return nil
+               return nil, false
        }
        // Also bad, but see the comment above.
        trace.lockOwner = nil
        unlock(&trace.lock)
        println("runtime: spurious wakeup of trace reader")
-       return nil
+       return nil, false
 }
 
 // traceReader returns the trace reader that should be woken up, if any.
 // Callers should first check that trace.enabled or trace.shutdown is set.
+//
+// This must run on the system stack because it acquires trace.lock.
+//
+//go:systemstack
 func traceReader() *g {
        // Optimistic check first
        if traceReaderAvailable() == nil {
 }
 
 // traceProcFree frees trace buffer associated with pp.
+//
+// This must run on the system stack because it acquires trace.lock.
+//
+//go:systemstack
 func traceProcFree(pp *p) {
        buf := pp.tracebuf
        pp.tracebuf = 0
        // TODO: test on non-zero extraBytes param.
        maxSize := 2 + 5*traceBytesPerNumber + extraBytes // event type, length, sequence, timestamp, stack id and two add params
        if buf == nil || len(buf.arr)-buf.pos < maxSize {
-               buf = traceFlush(traceBufPtrOf(buf), pid).ptr()
+               systemstack(func() {
+                       buf = traceFlush(traceBufPtrOf(buf), pid).ptr()
+               })
                bufp.set(buf)
        }
 
 
                        buf := bufp.ptr()
                        if buf == nil {
-                               *bufp = traceFlush(*bufp, 0)
+                               systemstack(func() {
+                                       *bufp = traceFlush(*bufp, 0)
+                               })
                                buf = bufp.ptr()
                        }
                        for i := range stk {
 }
 
 // traceFlush puts buf onto stack of full buffers and returns an empty buffer.
+//
+// This must run on the system stack because it acquires trace.lock.
+//
+//go:systemstack
 func traceFlush(buf traceBufPtr, pid int32) traceBufPtr {
        owner := trace.lockOwner
        dolock := owner == nil || owner != getg().m.curg
        buf := bufp.ptr()
        size := 1 + 2*traceBytesPerNumber + len(s)
        if buf == nil || len(buf.arr)-buf.pos < size {
-               buf = traceFlush(traceBufPtrOf(buf), pid).ptr()
-               bufp.set(buf)
+               systemstack(func() {
+                       buf = traceFlush(traceBufPtrOf(buf), pid).ptr()
+                       bufp.set(buf)
+               })
        }
        buf.byte(traceEvString)
        buf.varint(id)
 
 // dump writes all previously cached stacks to trace buffers,
 // releases all memory and resets state.
+//
+// This must run on the system stack because it calls traceFlush.
+//
+//go:systemstack
 func (tab *traceStackTable) dump(bufp traceBufPtr) traceBufPtr {
        for i, _ := range tab.tab {
                stk := tab.tab[i].ptr()