From 0a820007e70fdd038950f28254c6269cd9588c02 Mon Sep 17 00:00:00 2001
From: Dan Scales <danscales@google.com>
Date: Wed, 13 Nov 2019 17:34:47 -0800
Subject: [PATCH] runtime:  static lock ranking for the runtime (enabled by
 GOEXPERIMENT)

I took some of the infrastructure from Austin's lock logging CR
https://go-review.googlesource.com/c/go/+/192704 (with deadlock
detection from the logs), and developed a setup to give static lock
ranking for runtime locks.

Static lock ranking establishes a documented total ordering among locks,
and then reports an error if the total order is violated. This can
happen if a deadlock happens (by acquiring a sequence of locks in
different orders), or if just one side of a possible deadlock happens.
Lock ordering deadlocks cannot happen as long as the lock ordering is
followed.

Along the way, I found a deadlock involving the new timer code, which Ian fixed
via https://go-review.googlesource.com/c/go/+/207348, as well as two other
potential deadlocks.

See the constants at the top of runtime/lockrank.go to show the static
lock ranking that I ended up with, along with some comments. This is
great documentation of the current intended lock ordering when acquiring
multiple locks in the runtime.

I also added an array lockPartialOrder[] which shows and enforces the
current partial ordering among locks (which is embedded within the total
ordering). This is more specific about the dependencies among locks.

I don't try to check the ranking within a lock class with multiple locks
that can be acquired at the same time (i.e. check the ranking when
multiple hchan locks are acquired).

Currently, I am doing a lockInit() call to set the lock rank of most
locks. Any lock that is not otherwise initialized is assumed to be a
leaf lock (a very high rank lock), so that eliminates the need to do
anything for a bunch of locks (including all architecture-dependent
locks). For two locks, root.lock and notifyList.lock (only in the
runtime/sema.go file), it is not as easy to do lock initialization, so
instead, I am passing the lock rank with the lock calls.

For Windows compilation, I needed to increase the StackGuard size from
896 to 928 because of the new lock-rank checking functions.

Checking of the static lock ranking is enabled by setting
GOEXPERIMENT=staticlockranking before doing a run.

To make sure that the static lock ranking code has no overhead in memory
or CPU when not enabled by GOEXPERIMENT, I changed 'go build/install' so
that it defines a build tag (with the same name) whenever any experiment
has been baked into the toolchain (by checking Expstring()). This allows
me to avoid increasing the size of the 'mutex' type when static lock
ranking is not enabled.

Fixes #38029

Change-Id: I154217ff307c47051f8dae9c2a03b53081acd83a
Reviewed-on: https://go-review.googlesource.com/c/go/+/207619
Reviewed-by: Dan Scales <danscales@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Dan Scales <danscales@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/cmd/internal/objabi/stack.go |   2 +-
 src/cmd/internal/objabi/util.go  |   8 +-
 src/runtime/chan.go              |   1 +
 src/runtime/export_test.go       |   1 +
 src/runtime/iface.go             |   1 +
 src/runtime/lock_futex.go        |   8 ++
 src/runtime/lock_js.go           |   8 ++
 src/runtime/lock_sema.go         |  10 +-
 src/runtime/lockrank.go          | 234 +++++++++++++++++++++++++++++++
 src/runtime/lockrank_off.go      |  26 ++++
 src/runtime/lockrank_on.go       | 160 +++++++++++++++++++++
 src/runtime/malloc.go            |   3 +
 src/runtime/mcentral.go          |   1 +
 src/runtime/mgc.go               |   3 +
 src/runtime/mgcscavenge.go       |   1 +
 src/runtime/mgcsweep.go          |   1 +
 src/runtime/mgcwork.go           |   8 ++
 src/runtime/mheap.go             |   5 +
 src/runtime/netpoll.go           |   2 +
 src/runtime/proc.go              |  17 +++
 src/runtime/runtime2.go          |  13 ++
 src/runtime/rwmutex.go           |  10 +-
 src/runtime/sema.go              |  10 +-
 src/runtime/stack.go             |   7 +-
 src/runtime/trace.go             |   1 +
 src/sync/runtime.go              |  10 --
 src/sync/runtime2.go             |  15 ++
 src/sync/runtime2_lockrank.go    |  18 +++
 test/nosplit.go                  |   6 +-
 29 files changed, 561 insertions(+), 29 deletions(-)
 create mode 100644 src/runtime/lockrank.go
 create mode 100644 src/runtime/lockrank_off.go
 create mode 100644 src/runtime/lockrank_on.go
 create mode 100644 src/sync/runtime2.go
 create mode 100644 src/sync/runtime2_lockrank.go

diff --git a/src/cmd/internal/objabi/stack.go b/src/cmd/internal/objabi/stack.go
index 7320dbf365..05a1d4a4b5 100644
--- a/src/cmd/internal/objabi/stack.go
+++ b/src/cmd/internal/objabi/stack.go
@@ -18,7 +18,7 @@ const (
 )
 
 // Initialize StackGuard and StackLimit according to target system.
-var StackGuard = 896*stackGuardMultiplier() + StackSystem
+var StackGuard = 928*stackGuardMultiplier() + StackSystem
 var StackLimit = StackGuard - StackSystem - StackSmall
 
 // stackGuardMultiplier returns a multiplier to apply to the default
diff --git a/src/cmd/internal/objabi/util.go b/src/cmd/internal/objabi/util.go
index 4f8ba3d36c..8d05a6b735 100644
--- a/src/cmd/internal/objabi/util.go
+++ b/src/cmd/internal/objabi/util.go
@@ -152,9 +152,10 @@ func addexp(s string) {
 }
 
 var (
-	framepointer_enabled     int = 1
-	Fieldtrack_enabled       int
-	Preemptibleloops_enabled int
+	framepointer_enabled      int = 1
+	Fieldtrack_enabled        int
+	Preemptibleloops_enabled  int
+	Staticlockranking_enabled int
 )
 
 // Toolchain experiments.
@@ -168,6 +169,7 @@ var exper = []struct {
 	{"fieldtrack", &Fieldtrack_enabled},
 	{"framepointer", &framepointer_enabled},
 	{"preemptibleloops", &Preemptibleloops_enabled},
+	{"staticlockranking", &Staticlockranking_enabled},
 }
 
 var defaultExpstring = Expstring()
diff --git a/src/runtime/chan.go b/src/runtime/chan.go
index 1d4599e260..f6f4ffd02e 100644
--- a/src/runtime/chan.go
+++ b/src/runtime/chan.go
@@ -109,6 +109,7 @@ func makechan(t *chantype, size int) *hchan {
 	c.elemsize = uint16(elem.size)
 	c.elemtype = elem
 	c.dataqsiz = uint(size)
+	lockInit(&c.lock, lockRankHchan)
 
 	if debugChan {
 		print("makechan: chan=", c, "; elemsize=", elem.size, "; dataqsiz=", size, "\n")
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index 67379796c7..4c1150acd4 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -790,6 +790,7 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc {
 
 	// We've got an entry, so initialize the pageAlloc.
 	p.init(new(mutex), nil)
+	lockInit(p.mheapLock, lockRankMheap)
 	p.test = true
 
 	for i, init := range chunks {
diff --git a/src/runtime/iface.go b/src/runtime/iface.go
index e4b0b6d3d3..0504b89363 100644
--- a/src/runtime/iface.go
+++ b/src/runtime/iface.go
@@ -243,6 +243,7 @@ imethods:
 }
 
 func itabsinit() {
+	lockInit(&itabLock, lockRankItab)
 	lock(&itabLock)
 	for _, md := range activeModules() {
 		for _, i := range md.itablinks {
diff --git a/src/runtime/lock_futex.go b/src/runtime/lock_futex.go
index 92873f2dac..b0395d6a69 100644
--- a/src/runtime/lock_futex.go
+++ b/src/runtime/lock_futex.go
@@ -44,6 +44,10 @@ func key32(p *uintptr) *uint32 {
 }
 
 func lock(l *mutex) {
+	lockWithRank(l, getLockRank(l))
+}
+
+func lock2(l *mutex) {
 	gp := getg()
 
 	if gp.m.locks < 0 {
@@ -104,6 +108,10 @@ func lock(l *mutex) {
 }
 
 func unlock(l *mutex) {
+	lockRankRelease(l)
+}
+
+func unlock2(l *mutex) {
 	v := atomic.Xchg(key32(&l.key), mutex_unlocked)
 	if v == mutex_unlocked {
 		throw("unlock of unlocked lock")
diff --git a/src/runtime/lock_js.go b/src/runtime/lock_js.go
index 3168c86d8a..7a720f4790 100644
--- a/src/runtime/lock_js.go
+++ b/src/runtime/lock_js.go
@@ -26,6 +26,10 @@ const (
 )
 
 func lock(l *mutex) {
+	lockWithRank(l, getLockRank(l))
+}
+
+func lock2(l *mutex) {
 	if l.key == mutex_locked {
 		// js/wasm is single-threaded so we should never
 		// observe this.
@@ -40,6 +44,10 @@ func lock(l *mutex) {
 }
 
 func unlock(l *mutex) {
+	lockRankRelease(l)
+}
+
+func unlock2(l *mutex) {
 	if l.key == mutex_unlocked {
 		throw("unlock of unlocked lock")
 	}
diff --git a/src/runtime/lock_sema.go b/src/runtime/lock_sema.go
index af9517d744..d79520da07 100644
--- a/src/runtime/lock_sema.go
+++ b/src/runtime/lock_sema.go
@@ -33,6 +33,10 @@ const (
 )
 
 func lock(l *mutex) {
+	lockWithRank(l, getLockRank(l))
+}
+
+func lock2(l *mutex) {
 	gp := getg()
 	if gp.m.locks < 0 {
 		throw("runtimeÂ·lock: lock count")
@@ -89,9 +93,13 @@ Loop:
 	}
 }
 
+func unlock(l *mutex) {
+	lockRankRelease(l)
+}
+
 //go:nowritebarrier
 // We might not be holding a p in this code.
-func unlock(l *mutex) {
+func unlock2(l *mutex) {
 	gp := getg()
 	var mp *m
 	for {
diff --git a/src/runtime/lockrank.go b/src/runtime/lockrank.go
new file mode 100644
index 0000000000..4b7273aa5f
--- /dev/null
+++ b/src/runtime/lockrank.go
@@ -0,0 +1,234 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file records the static ranks of the locks in the runtime. If a lock
+// is not given a rank, then it is assumed to be a leaf lock, which means no other
+// lock can be acquired while it is held. Therefore, leaf locks do not need to be
+// given an explicit rank. We list all of the architecture-independent leaf locks
+// for documentation purposes, but don't list any of the architecture-dependent
+// locks (which are all leaf locks). debugLock is ignored for ranking, since it is used
+// when printing out lock ranking errors.
+//
+// lockInit(l *mutex, rank int) is used to set the rank of lock before it is used.
+// If there is no clear place to initialize a lock, then the rank of a lock can be
+// specified during the lock call itself via lockWithrank(l *mutex, rank int).
+//
+// Besides the static lock ranking (which is a total ordering of the locks), we
+// also represent and enforce the actual partial order among the locks in the
+// arcs[] array below. That is, if it is possible that lock B can be acquired when
+// lock A is the previous acquired lock that is still held, then there should be
+// an entry for A in arcs[B][]. We will currently fail not only if the total order
+// (the lock ranking) is violated, but also if there is a missing entry in the
+// partial order.
+
+package runtime
+
+type lockRank int
+
+// Constants representing the lock rank of the architecture-independent locks in
+// the runtime.
+const (
+	lockRankDummy lockRank = iota
+
+	// Locks held above sched
+	lockRankScavenge
+	lockRankForcegc
+	lockRankSweepWaiters
+	lockRankAssistQueue
+	lockRankCpuprof
+	lockRankSweep
+
+	lockRankSched
+	lockRankDeadlock
+	lockRankPanic
+	lockRankAllg
+	lockRankAllp
+	lockRankPollDesc
+
+	lockRankTimers // Multiple timers locked simultaneously in destroy()
+	lockRankItab
+	lockRankReflectOffs
+	lockRankHchan // Multiple hchans acquired in lock order in syncadjustsudogs()
+	lockRankFin
+	lockRankNotifyList
+	lockRankTraceBuf
+	lockRankTraceStrings
+	lockRankMspanSpecial
+	lockRankProf
+	lockRankGcBitsArenas
+	lockRankRoot
+	lockRankTrace
+	lockRankTraceStackTab
+	lockRankNetpollInit
+
+	lockRankRwmutexW
+	lockRankRwmutexR
+
+	lockRankMcentral
+	lockRankSpine
+	lockRankStackpool
+	lockRankStackLarge
+	lockRankDefer
+	lockRankSudog
+
+	// Memory-related non-leaf locks
+	lockRankWbufSpans
+	lockRankMheap
+
+	// Memory-related leaf locks
+	lockRankMheapSpecial
+	lockRankGlobalAlloc
+
+	// Other leaf locks
+	lockRankGFree
+
+	// Leaf locks with no dependencies, so these constants are not actually used anywhere.
+	// There are other architecture-dependent leaf locks as well.
+	lockRankNewmHandoff
+	lockRankDebugPtrmask
+	lockRankFaketimeState
+	lockRankTicks
+	lockRankRaceFini
+	lockRankPollCache
+	lockRankDebug
+)
+
+// lockRankLeafRank is the rank of lock that does not have a declared rank, and hence is
+// a leaf lock.
+const lockRankLeafRank lockRank = 1000
+
+// lockNames gives the names associated with each of the above ranks
+var lockNames = []string{
+	lockRankDummy: "",
+
+	lockRankScavenge:     "scavenge",
+	lockRankForcegc:      "forcegc",
+	lockRankSweepWaiters: "sweepWaiters",
+	lockRankAssistQueue:  "assistQueue",
+	lockRankCpuprof:      "cpuprof",
+	lockRankSweep:        "sweep",
+
+	lockRankSched:    "sched",
+	lockRankDeadlock: "deadlock",
+	lockRankPanic:    "panic",
+	lockRankAllg:     "allg",
+	lockRankAllp:     "allp",
+	lockRankPollDesc: "pollDesc",
+
+	lockRankTimers:      "timers",
+	lockRankItab:        "itab",
+	lockRankReflectOffs: "reflectOffs",
+
+	lockRankHchan:         "hchan",
+	lockRankFin:           "fin",
+	lockRankNotifyList:    "notifyList",
+	lockRankTraceBuf:      "traceBuf",
+	lockRankTraceStrings:  "traceStrings",
+	lockRankMspanSpecial:  "mspanSpecial",
+	lockRankProf:          "prof",
+	lockRankGcBitsArenas:  "gcBitsArenas",
+	lockRankRoot:          "root",
+	lockRankTrace:         "trace",
+	lockRankTraceStackTab: "traceStackTab",
+	lockRankNetpollInit:   "netpollInit",
+
+	lockRankRwmutexW: "rwmutexW",
+	lockRankRwmutexR: "rwmutexR",
+
+	lockRankMcentral:   "mcentral",
+	lockRankSpine:      "spine",
+	lockRankStackpool:  "stackpool",
+	lockRankStackLarge: "stackLarge",
+	lockRankDefer:      "defer",
+	lockRankSudog:      "sudog",
+
+	lockRankWbufSpans: "wbufSpans",
+	lockRankMheap:     "mheap",
+
+	lockRankMheapSpecial: "mheapSpecial",
+	lockRankGlobalAlloc:  "globalAlloc.mutex",
+
+	lockRankGFree: "gFree",
+
+	lockRankNewmHandoff:   "newmHandoff.lock",
+	lockRankDebugPtrmask:  "debugPtrmask.lock",
+	lockRankFaketimeState: "faketimeState.lock",
+	lockRankTicks:         "ticks.lock",
+	lockRankRaceFini:      "raceFiniLock",
+	lockRankPollCache:     "pollCache.lock",
+	lockRankDebug:         "debugLock",
+}
+
+func (rank lockRank) String() string {
+	if rank == 0 {
+		return "UNKNOWN"
+	}
+	if rank == lockRankLeafRank {
+		return "LEAF"
+	}
+	return lockNames[rank]
+}
+
+// lockPartialOrder is a partial order among the various lock types, listing the immediate
+// ordering that has actually been observed in the runtime. Each entry (which
+// corresponds to a particular lock rank) specifies the list of locks that can be
+// already be held immediately "above" it.
+//
+// So, for example, the lockRankSched entry shows that all the locks preceding it in
+// rank can actually be held. The fin lock shows that only the sched, timers, or
+// hchan lock can be held immediately above it when it is acquired.
+var lockPartialOrder [][]lockRank = [][]lockRank{
+	lockRankDummy:         {},
+	lockRankScavenge:      {},
+	lockRankForcegc:       {},
+	lockRankSweepWaiters:  {},
+	lockRankAssistQueue:   {},
+	lockRankCpuprof:       {},
+	lockRankSweep:         {},
+	lockRankSched:         {lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep},
+	lockRankDeadlock:      {lockRankDeadlock},
+	lockRankPanic:         {lockRankDeadlock},
+	lockRankAllg:          {lockRankSched, lockRankPanic},
+	lockRankAllp:          {lockRankSched},
+	lockRankPollDesc:      {},
+	lockRankTimers:        {lockRankScavenge, lockRankSched, lockRankAllp, lockRankPollDesc, lockRankTimers},
+	lockRankItab:          {},
+	lockRankReflectOffs:   {lockRankItab},
+	lockRankHchan:         {lockRankScavenge, lockRankSweep, lockRankHchan},
+	lockRankFin:           {lockRankSched, lockRankAllg, lockRankTimers, lockRankHchan},
+	lockRankNotifyList:    {},
+	lockRankTraceBuf:      {},
+	lockRankTraceStrings:  {lockRankTraceBuf},
+	lockRankMspanSpecial:  {lockRankScavenge, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings},
+	lockRankProf:          {lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
+	lockRankGcBitsArenas:  {lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
+	lockRankRoot:          {},
+	lockRankTrace:         {lockRankScavenge, lockRankAssistQueue, lockRankSched, lockRankHchan, lockRankTraceBuf, lockRankTraceStrings, lockRankRoot, lockRankSweep},
+	lockRankTraceStackTab: {lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankSched, lockRankTimers, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankRoot, lockRankTrace},
+	lockRankNetpollInit:   {lockRankTimers},
+
+	lockRankRwmutexW: {},
+	lockRankRwmutexR: {lockRankRwmutexW},
+
+	lockRankMcentral:     {lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
+	lockRankSpine:        {lockRankScavenge, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
+	lockRankStackpool:    {lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankRwmutexR, lockRankMcentral, lockRankSpine},
+	lockRankStackLarge:   {lockRankAssistQueue, lockRankSched, lockRankItab, lockRankHchan, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankMcentral},
+	lockRankDefer:        {},
+	lockRankSudog:        {lockRankNotifyList, lockRankHchan},
+	lockRankWbufSpans:    {lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankSched, lockRankAllg, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankNotifyList, lockRankTraceStrings, lockRankMspanSpecial, lockRankProf, lockRankRoot, lockRankDefer, lockRankSudog},
+	lockRankMheap:        {lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan, lockRankMspanSpecial, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankMcentral, lockRankStackpool, lockRankStackLarge, lockRankDefer, lockRankSudog, lockRankWbufSpans},
+	lockRankMheapSpecial: {lockRankScavenge, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
+	lockRankGlobalAlloc:  {lockRankSpine, lockRankMheap},
+
+	lockRankGFree: {lockRankSched},
+
+	lockRankNewmHandoff:   {},
+	lockRankDebugPtrmask:  {},
+	lockRankFaketimeState: {},
+	lockRankTicks:         {},
+	lockRankRaceFini:      {},
+	lockRankPollCache:     {},
+	lockRankDebug:         {},
+}
diff --git a/src/runtime/lockrank_off.go b/src/runtime/lockrank_off.go
new file mode 100644
index 0000000000..fcfcff57a3
--- /dev/null
+++ b/src/runtime/lockrank_off.go
@@ -0,0 +1,26 @@
+// +build !goexperiment.staticlockranking
+
+package runtime
+
+// // lockRankStruct is embedded in mutex, but is empty when staticklockranking is
+// disabled (the default)
+type lockRankStruct struct {
+}
+
+func lockInit(l *mutex, rank lockRank) {
+}
+
+func getLockRank(l *mutex) lockRank {
+	return 0
+}
+
+func lockRankRelease(l *mutex) {
+	unlock2(l)
+}
+
+func lockWithRank(l *mutex, rank lockRank) {
+	lock2(l)
+}
+
+func lockWithRankMayAcquire(l *mutex, rank lockRank) {
+}
diff --git a/src/runtime/lockrank_on.go b/src/runtime/lockrank_on.go
new file mode 100644
index 0000000000..fc72a06f6f
--- /dev/null
+++ b/src/runtime/lockrank_on.go
@@ -0,0 +1,160 @@
+// +build goexperiment.staticlockranking
+
+package runtime
+
+import (
+	"unsafe"
+)
+
+// lockRankStruct is embedded in mutex
+type lockRankStruct struct {
+	// static lock ranking of the lock
+	rank lockRank
+	// pad field to make sure lockRankStruct is a multiple of 8 bytes, even on
+	// 32-bit systems.
+	pad int
+}
+
+// init checks that the partial order in lockPartialOrder fits within the total
+// order determined by the order of the lockRank constants.
+func init() {
+	for rank, list := range lockPartialOrder {
+		for _, entry := range list {
+			if entry > lockRank(rank) {
+				println("lockPartial order row", lockRank(rank).String(), "entry", entry.String())
+				throw("lockPartialOrder table is inconsistent with total lock ranking order")
+			}
+		}
+	}
+}
+
+func lockInit(l *mutex, rank lockRank) {
+	l.rank = rank
+}
+
+func getLockRank(l *mutex) lockRank {
+	return l.rank
+}
+
+// The following functions are the entry-points to record lock
+// operations.
+// All of these are nosplit and switch to the system stack immediately
+// to avoid stack growths. Since a stack growth could itself have lock
+// operations, this prevents re-entrant calls.
+
+// lockWithRank is like lock(l), but allows the caller to specify a lock rank
+// when acquiring a non-static lock.
+//go:nosplit
+func lockWithRank(l *mutex, rank lockRank) {
+	if l == &debuglock {
+		// debuglock is only used for println/printlock(). Don't do lock rank
+		// recording for it, since print/println are used when printing
+		// out a lock ordering problem below.
+		lock2(l)
+		return
+	}
+	if rank == 0 {
+		rank = lockRankLeafRank
+	}
+	gp := getg()
+	// Log the new class.
+	systemstack(func() {
+		i := gp.m.locksHeldLen
+		if i >= len(gp.m.locksHeld) {
+			throw("too many locks held concurrently for rank checking")
+		}
+		gp.m.locksHeld[i].rank = rank
+		gp.m.locksHeld[i].lockAddr = uintptr(unsafe.Pointer(l))
+		gp.m.locksHeldLen++
+
+		// i is the index of the lock being acquired
+		if i > 0 {
+			checkRanks(gp, gp.m.locksHeld[i-1].rank, rank)
+		}
+		lock2(l)
+	})
+}
+
+func checkRanks(gp *g, prevRank, rank lockRank) {
+	rankOK := false
+	// If rank < prevRank, then we definitely have a rank error
+	if prevRank <= rank {
+		if rank == lockRankLeafRank {
+			// If new lock is a leaf lock, then the preceding lock can
+			// be anything except another leaf lock.
+			rankOK = prevRank < lockRankLeafRank
+		} else {
+			// We've already verified the total lock ranking, but we
+			// also enforce the partial ordering specified by
+			// lockPartialOrder as well. Two locks with the same rank
+			// can only be acquired at the same time if explicitly
+			// listed in the lockPartialOrder table.
+			list := lockPartialOrder[rank]
+			for _, entry := range list {
+				if entry == prevRank {
+					rankOK = true
+					break
+				}
+			}
+		}
+	}
+	if !rankOK {
+		printlock()
+		println(gp.m.procid, " ======")
+		for j, held := range gp.m.locksHeld[:gp.m.locksHeldLen] {
+			println(j, ":", held.rank.String(), held.rank, unsafe.Pointer(gp.m.locksHeld[j].lockAddr))
+		}
+		throw("lock ordering problem")
+	}
+}
+
+//go:nosplit
+func lockRankRelease(l *mutex) {
+	if l == &debuglock {
+		// debuglock is only used for print/println. Don't do lock rank
+		// recording for it, since print/println are used when printing
+		// out a lock ordering problem below.
+		unlock2(l)
+		return
+	}
+	gp := getg()
+	systemstack(func() {
+		found := false
+		for i := gp.m.locksHeldLen - 1; i >= 0; i-- {
+			if gp.m.locksHeld[i].lockAddr == uintptr(unsafe.Pointer(l)) {
+				found = true
+				copy(gp.m.locksHeld[i:gp.m.locksHeldLen-1], gp.m.locksHeld[i+1:gp.m.locksHeldLen])
+				gp.m.locksHeldLen--
+			}
+		}
+		if !found {
+			println(gp.m.procid, ":", l.rank.String(), l.rank, l)
+			throw("unlock without matching lock acquire")
+		}
+		unlock2(l)
+	})
+}
+
+//go:nosplit
+func lockWithRankMayAcquire(l *mutex, rank lockRank) {
+	gp := getg()
+	if gp.m.locksHeldLen == 0 {
+		// No possibilty of lock ordering problem if no other locks held
+		return
+	}
+
+	systemstack(func() {
+		i := gp.m.locksHeldLen
+		if i >= len(gp.m.locksHeld) {
+			throw("too many locks held concurrently for rank checking")
+		}
+		// Temporarily add this lock to the locksHeld list, so
+		// checkRanks() will print out list, including this lock, if there
+		// is a lock ordering problem.
+		gp.m.locksHeld[i].rank = rank
+		gp.m.locksHeld[i].lockAddr = uintptr(unsafe.Pointer(l))
+		gp.m.locksHeldLen++
+		checkRanks(gp, gp.m.locksHeld[i-1].rank, rank)
+		gp.m.locksHeldLen--
+	})
+}
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 346d7f4742..5a0d85f645 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -469,6 +469,9 @@ func mallocinit() {
 	// Initialize the heap.
 	mheap_.init()
 	mcache0 = allocmcache()
+	lockInit(&gcBitsArenas.lock, lockRankGcBitsArenas)
+	lockInit(&proflock, lockRankProf)
+	lockInit(&globalAlloc.mutex, lockRankGlobalAlloc)
 
 	// Create initial arena growth hints.
 	if sys.PtrSize == 8 {
diff --git a/src/runtime/mcentral.go b/src/runtime/mcentral.go
index 78a3ae6ac1..fd0035bed5 100644
--- a/src/runtime/mcentral.go
+++ b/src/runtime/mcentral.go
@@ -34,6 +34,7 @@ func (c *mcentral) init(spc spanClass) {
 	c.spanclass = spc
 	c.nonempty.init()
 	c.empty.init()
+	lockInit(&c.lock, lockRankMcentral)
 }
 
 // Allocate a span to use in an mcache.
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 7a8ab5314f..08159e219a 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -191,6 +191,9 @@ func gcinit() {
 
 	work.startSema = 1
 	work.markDoneSema = 1
+	lockInit(&work.sweepWaiters.lock, lockRankSweepWaiters)
+	lockInit(&work.assistQueue.lock, lockRankAssistQueue)
+	lockInit(&work.wbufSpans.lock, lockRankWbufSpans)
 }
 
 func readgogc() int32 {
diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go
index c2625095f6..5a85505ca4 100644
--- a/src/runtime/mgcscavenge.go
+++ b/src/runtime/mgcscavenge.go
@@ -225,6 +225,7 @@ func scavengeSleep(ns int64) int64 {
 func bgscavenge(c chan int) {
 	scavenge.g = getg()
 
+	lockInit(&scavenge.lock, lockRankScavenge)
 	lock(&scavenge.lock)
 	scavenge.parked = true
 
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go
index fd9bf8f864..c075f66b12 100644
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -64,6 +64,7 @@ func finishsweep_m() {
 func bgsweep(c chan int) {
 	sweep.g = getg()
 
+	lockInit(&sweep.lock, lockRankSweep)
 	lock(&sweep.lock)
 	sweep.parked = true
 	c <- 1
diff --git a/src/runtime/mgcwork.go b/src/runtime/mgcwork.go
index 927b06c3f9..46101657d5 100644
--- a/src/runtime/mgcwork.go
+++ b/src/runtime/mgcwork.go
@@ -178,6 +178,10 @@ func (w *gcWork) put(obj uintptr) {
 
 	flushed := false
 	wbuf := w.wbuf1
+	// Record that this may acquire the wbufSpans or heap lock to
+	// allocate a workbuf.
+	lockWithRankMayAcquire(&work.wbufSpans.lock, lockRankWbufSpans)
+	lockWithRankMayAcquire(&mheap_.lock, lockRankMheap)
 	if wbuf == nil {
 		w.init()
 		wbuf = w.wbuf1
@@ -423,6 +427,10 @@ func getempty() *workbuf {
 			b.checkempty()
 		}
 	}
+	// Record that this may acquire the wbufSpans or heap lock to
+	// allocate a workbuf.
+	lockWithRankMayAcquire(&work.wbufSpans.lock, lockRankWbufSpans)
+	lockWithRankMayAcquire(&mheap_.lock, lockRankMheap)
 	if b == nil {
 		// Allocate more workbufs.
 		var s *mspan
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 86ecf3377d..9774dfb282 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -670,6 +670,10 @@ func pageIndexOf(p uintptr) (arena *heapArena, pageIdx uintptr, pageMask uint8)
 
 // Initialize the heap.
 func (h *mheap) init() {
+	lockInit(&h.lock, lockRankMheap)
+	lockInit(&h.sweepSpans[0].spineLock, lockRankSpine)
+	lockInit(&h.sweepSpans[1].spineLock, lockRankSpine)
+
 	h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
 	h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys)
 	h.specialfinalizeralloc.init(unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys)
@@ -1474,6 +1478,7 @@ func (span *mspan) init(base uintptr, npages uintptr) {
 	span.allocBits = nil
 	span.gcmarkBits = nil
 	span.state.set(mSpanDead)
+	lockInit(&span.speciallock, lockRankMspanSpecial)
 }
 
 func (span *mspan) inList() bool {
diff --git a/src/runtime/netpoll.go b/src/runtime/netpoll.go
index a332045342..34ea82a7fa 100644
--- a/src/runtime/netpoll.go
+++ b/src/runtime/netpoll.go
@@ -116,6 +116,7 @@ func poll_runtime_pollServerInit() {
 
 func netpollGenericInit() {
 	if atomic.Load(&netpollInited) == 0 {
+		lockInit(&netpollInitLock, lockRankNetpollInit)
 		lock(&netpollInitLock)
 		if netpollInited == 0 {
 			netpollinit()
@@ -542,6 +543,7 @@ func (c *pollCache) alloc() *pollDesc {
 	}
 	pd := c.first
 	c.first = pd.link
+	lockInit(&pd.lock, lockRankPollDesc)
 	unlock(&c.lock)
 	return pd
 }
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index c7097e2906..202c300e41 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -245,6 +245,7 @@ func init() {
 
 func forcegchelper() {
 	forcegc.g = getg()
+	lockInit(&forcegc.lock, lockRankForcegc)
 	for {
 		lock(&forcegc.lock)
 		if forcegc.idle != 0 {
@@ -531,6 +532,21 @@ func cpuinit() {
 //
 // The new G calls runtimeÂ·main.
 func schedinit() {
+	lockInit(&sched.lock, lockRankSched)
+	lockInit(&sched.deferlock, lockRankDefer)
+	lockInit(&sched.sudoglock, lockRankSudog)
+	lockInit(&deadlock, lockRankDeadlock)
+	lockInit(&paniclk, lockRankPanic)
+	lockInit(&allglock, lockRankAllg)
+	lockInit(&allpLock, lockRankAllp)
+	lockInit(&reflectOffs.lock, lockRankReflectOffs)
+	lockInit(&finlock, lockRankFin)
+	lockInit(&trace.bufLock, lockRankTraceBuf)
+	lockInit(&trace.stringsLock, lockRankTraceStrings)
+	lockInit(&trace.lock, lockRankTrace)
+	lockInit(&cpuprof.lock, lockRankCpuprof)
+	lockInit(&trace.stackTab.lock, lockRankTraceStackTab)
+
 	// raceinit must be the first call to race detector.
 	// In particular, it must be done before mallocinit below calls racemapshadow.
 	_g_ := getg()
@@ -4120,6 +4136,7 @@ func (pp *p) init(id int32) {
 			pp.raceprocctx = raceproccreate()
 		}
 	}
+	lockInit(&pp.timersLock, lockRankTimers)
 }
 
 // destroy releases all of the resources associated with pp and
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 1a98927647..15e24c8175 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -158,7 +158,10 @@ const (
 // as fast as spin locks (just a few user-level instructions),
 // but on the contention path they sleep in the kernel.
 // A zeroed Mutex is unlocked (no need to initialize each lock).
+// Initialization is helpful for static lock ranking, but not required.
 type mutex struct {
+	// Empty struct if lock ranking is disabled, otherwise includes the lock rank
+	lockRankStruct
 	// Futex-based impl treats it as uint32 key,
 	// while sema-based impl as M* waitm.
 	// Used to be a union, but unions break precise GC.
@@ -392,6 +395,12 @@ type stack struct {
 	hi uintptr
 }
 
+// heldLockInfo gives info on a held lock and the rank of that lock
+type heldLockInfo struct {
+	lockAddr uintptr
+	rank     lockRank
+}
+
 type g struct {
 	// Stack parameters.
 	// stack describes the actual stack memory: [stack.lo, stack.hi).
@@ -546,6 +555,10 @@ type m struct {
 	dlogPerM
 
 	mOS
+
+	// Up to 10 locks held by this m, maintained by the lock ranking code.
+	locksHeldLen int
+	locksHeld    [10]heldLockInfo
 }
 
 type p struct {
diff --git a/src/runtime/rwmutex.go b/src/runtime/rwmutex.go
index a6da4c979b..7713c3f1cc 100644
--- a/src/runtime/rwmutex.go
+++ b/src/runtime/rwmutex.go
@@ -39,7 +39,7 @@ func (rw *rwmutex) rlock() {
 	if int32(atomic.Xadd(&rw.readerCount, 1)) < 0 {
 		// A writer is pending. Park on the reader queue.
 		systemstack(func() {
-			lock(&rw.rLock)
+			lockWithRank(&rw.rLock, lockRankRwmutexR)
 			if rw.readerPass > 0 {
 				// Writer finished.
 				rw.readerPass -= 1
@@ -67,7 +67,7 @@ func (rw *rwmutex) runlock() {
 		// A writer is pending.
 		if atomic.Xadd(&rw.readerWait, -1) == 0 {
 			// The last reader unblocks the writer.
-			lock(&rw.rLock)
+			lockWithRank(&rw.rLock, lockRankRwmutexR)
 			w := rw.writer.ptr()
 			if w != nil {
 				notewakeup(&w.park)
@@ -81,12 +81,12 @@ func (rw *rwmutex) runlock() {
 // lock locks rw for writing.
 func (rw *rwmutex) lock() {
 	// Resolve competition with other writers and stick to our P.
-	lock(&rw.wLock)
+	lockWithRank(&rw.wLock, lockRankRwmutexW)
 	m := getg().m
 	// Announce that there is a pending writer.
 	r := int32(atomic.Xadd(&rw.readerCount, -rwmutexMaxReaders)) + rwmutexMaxReaders
 	// Wait for any active readers to complete.
-	lock(&rw.rLock)
+	lockWithRank(&rw.rLock, lockRankRwmutexR)
 	if r != 0 && atomic.Xadd(&rw.readerWait, r) != 0 {
 		// Wait for reader to wake us up.
 		systemstack(func() {
@@ -108,7 +108,7 @@ func (rw *rwmutex) unlock() {
 		throw("unlock of unlocked rwmutex")
 	}
 	// Unblock blocked readers.
-	lock(&rw.rLock)
+	lockWithRank(&rw.rLock, lockRankRwmutexR)
 	for rw.readers.ptr() != nil {
 		reader := rw.readers.ptr()
 		rw.readers = reader.schedlink
diff --git a/src/runtime/sema.go b/src/runtime/sema.go
index 9bfd4f96d5..f94c1aa891 100644
--- a/src/runtime/sema.go
+++ b/src/runtime/sema.go
@@ -129,7 +129,7 @@ func semacquire1(addr *uint32, lifo bool, profile semaProfileFlags, skipframes i
 		s.acquiretime = t0
 	}
 	for {
-		lock(&root.lock)
+		lockWithRank(&root.lock, lockRankRoot)
 		// Add ourselves to nwait to disable "easy case" in semrelease.
 		atomic.Xadd(&root.nwait, 1)
 		// Check cansemacquire to avoid missed wakeup.
@@ -168,7 +168,7 @@ func semrelease1(addr *uint32, handoff bool, skipframes int) {
 	}
 
 	// Harder case: search for a waiter and wake it.
-	lock(&root.lock)
+	lockWithRank(&root.lock, lockRankRoot)
 	if atomic.Load(&root.nwait) == 0 {
 		// The count is already consumed by another goroutine,
 		// so no need to wake up another goroutine.
@@ -486,7 +486,7 @@ func notifyListAdd(l *notifyList) uint32 {
 // notifyListAdd was called, it returns immediately. Otherwise, it blocks.
 //go:linkname notifyListWait sync.runtime_notifyListWait
 func notifyListWait(l *notifyList, t uint32) {
-	lock(&l.lock)
+	lockWithRank(&l.lock, lockRankNotifyList)
 
 	// Return right away if this ticket has already been notified.
 	if less(t, l.notify) {
@@ -528,7 +528,7 @@ func notifyListNotifyAll(l *notifyList) {
 
 	// Pull the list out into a local variable, waiters will be readied
 	// outside the lock.
-	lock(&l.lock)
+	lockWithRank(&l.lock, lockRankNotifyList)
 	s := l.head
 	l.head = nil
 	l.tail = nil
@@ -558,7 +558,7 @@ func notifyListNotifyOne(l *notifyList) {
 		return
 	}
 
-	lock(&l.lock)
+	lockWithRank(&l.lock, lockRankNotifyList)
 
 	// Re-check under the lock if we need to do anything.
 	t := l.notify
diff --git a/src/runtime/stack.go b/src/runtime/stack.go
index e72a75cdef..b5efac0117 100644
--- a/src/runtime/stack.go
+++ b/src/runtime/stack.go
@@ -91,7 +91,7 @@ const (
 
 	// The stack guard is a pointer this many bytes above the
 	// bottom of the stack.
-	_StackGuard = 896*sys.StackGuardMultiplier + _StackSystem
+	_StackGuard = 928*sys.StackGuardMultiplier + _StackSystem
 
 	// After a stack split check the SP is allowed to be this
 	// many bytes below the stack guard. This saves an instruction
@@ -161,9 +161,11 @@ func stackinit() {
 	}
 	for i := range stackpool {
 		stackpool[i].item.span.init()
+		lockInit(&stackpool[i].item.mu, lockRankStackpool)
 	}
 	for i := range stackLarge.free {
 		stackLarge.free[i].init()
+		lockInit(&stackLarge.lock, lockRankStackLarge)
 	}
 }
 
@@ -182,6 +184,7 @@ func stacklog2(n uintptr) int {
 func stackpoolalloc(order uint8) gclinkptr {
 	list := &stackpool[order].item.span
 	s := list.first
+	lockWithRankMayAcquire(&mheap_.lock, lockRankMheap)
 	if s == nil {
 		// no free stacks. Allocate another span worth.
 		s = mheap_.allocManual(_StackCacheSize>>_PageShift, &memstats.stacks_inuse)
@@ -389,6 +392,8 @@ func stackalloc(n uint32) stack {
 		}
 		unlock(&stackLarge.lock)
 
+		lockWithRankMayAcquire(&mheap_.lock, lockRankMheap)
+
 		if s == nil {
 			// Allocate a new stack from the heap.
 			s = mheap_.allocManual(npage, &memstats.stacks_inuse)
diff --git a/src/runtime/trace.go b/src/runtime/trace.go
index 9aa9facabe..33062daa46 100644
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -869,6 +869,7 @@ func (tab *traceStackTable) dump() {
 
 	tab.mem.drop()
 	*tab = traceStackTable{}
+	lockInit(&((*tab).lock), lockRankTraceStackTab)
 }
 
 type traceFrame struct {
diff --git a/src/sync/runtime.go b/src/sync/runtime.go
index 3ad44e786f..de2b0a3ccd 100644
--- a/src/sync/runtime.go
+++ b/src/sync/runtime.go
@@ -28,16 +28,6 @@ func runtime_SemacquireMutex(s *uint32, lifo bool, skipframes int)
 // runtime_Semrelease's caller.
 func runtime_Semrelease(s *uint32, handoff bool, skipframes int)
 
-// Approximation of notifyList in runtime/sema.go. Size and alignment must
-// agree.
-type notifyList struct {
-	wait   uint32
-	notify uint32
-	lock   uintptr
-	head   unsafe.Pointer
-	tail   unsafe.Pointer
-}
-
 // See runtime/sema.go for documentation.
 func runtime_notifyListAdd(l *notifyList) uint32
 
diff --git a/src/sync/runtime2.go b/src/sync/runtime2.go
new file mode 100644
index 0000000000..931edad9f1
--- /dev/null
+++ b/src/sync/runtime2.go
@@ -0,0 +1,15 @@
+// +build !goexperiment.staticlockranking
+
+package sync
+
+import "unsafe"
+
+// Approximation of notifyList in runtime/sema.go. Size and alignment must
+// agree.
+type notifyList struct {
+	wait   uint32
+	notify uint32
+	lock   uintptr // key field of the mutex
+	head   unsafe.Pointer
+	tail   unsafe.Pointer
+}
diff --git a/src/sync/runtime2_lockrank.go b/src/sync/runtime2_lockrank.go
new file mode 100644
index 0000000000..5a68e901fa
--- /dev/null
+++ b/src/sync/runtime2_lockrank.go
@@ -0,0 +1,18 @@
+// +build goexperiment.staticlockranking
+
+package sync
+
+import "unsafe"
+
+// Approximation of notifyList in runtime/sema.go. Size and alignment must
+// agree.
+type notifyList struct {
+	wait   uint32
+	notify uint32
+	rank   int     // rank field of the mutex
+	pad    int     // pad field of the mutex
+	lock   uintptr // key field of the mutex
+
+	head unsafe.Pointer
+	tail unsafe.Pointer
+}
diff --git a/test/nosplit.go b/test/nosplit.go
index ad19d8a2b5..a3f2a9fb7e 100644
--- a/test/nosplit.go
+++ b/test/nosplit.go
@@ -312,17 +312,17 @@ TestCases:
 				name := m[1]
 				size, _ := strconv.Atoi(m[2])
 
-				// The limit was originally 128 but is now 768 (896-128).
+				// The limit was originally 128 but is now 800 (928-128).
 				// Instead of rewriting the test cases above, adjust
 				// the first stack frame to use up the extra bytes.
 				if i == 0 {
-					size += (896 - 128) - 128
+					size += (928 - 128) - 128
 					// Noopt builds have a larger stackguard.
 					// See ../src/cmd/dist/buildruntime.go:stackGuardMultiplier
 					// This increase is included in objabi.StackGuard
 					for _, s := range strings.Split(os.Getenv("GO_GCFLAGS"), " ") {
 						if s == "-N" {
-							size += 896
+							size += 928
 						}
 					}
 				}
-- 
2.51.0