typedef struct Sema Sema;
struct Sema
{
- uint32 *addr;
+ uint32 volatile *addr;
G *g;
Sema *prev;
Sema *next;
};
-// TODO: For now, a linked list; maybe a hash table of linked lists later.
-static Sema *semfirst, *semlast;
-static Lock semlock;
+typedef struct SemaRoot SemaRoot;
+struct SemaRoot
+{
+ Lock;
+ Sema *head;
+ Sema *tail;
+ // Number of waiters. Read w/o the lock.
+ uint32 volatile nwait;
+};
+
+// Prime to not correlate with any user patterns.
+#define SEMTABLESZ 251
+
+static union
+{
+ SemaRoot;
+ // Modern processors tend to have 64-byte cache lines,
+ // potentially with 128-byte effective cache line size for reading.
+ // While there are hypothetical architectures
+ // with 16-4096 byte cache lines, 128 looks like a good compromise.
+ uint8 pad[128];
+} semtable[SEMTABLESZ];
+
+static SemaRoot*
+semroot(uint32 *addr)
+{
+ return &semtable[((uintptr)addr >> 3) % SEMTABLESZ];
+}
static void
-semqueue(uint32 *addr, Sema *s)
+semqueue(SemaRoot *root, uint32 volatile *addr, Sema *s)
{
+ s->g = g;
s->addr = addr;
- s->g = nil;
-
- runtime·lock(&semlock);
- s->prev = semlast;
s->next = nil;
- if(semlast)
- semlast->next = s;
+ s->prev = root->tail;
+ if(root->tail)
+ root->tail->next = s;
else
- semfirst = s;
- semlast = s;
- runtime·unlock(&semlock);
+ root->head = s;
+ root->tail = s;
}
static void
-semdequeue(Sema *s)
+semdequeue(SemaRoot *root, Sema *s)
{
- runtime·lock(&semlock);
if(s->next)
s->next->prev = s->prev;
else
- semlast = s->prev;
+ root->tail = s->prev;
if(s->prev)
s->prev->next = s->next;
else
- semfirst = s->next;
+ root->head = s->next;
s->prev = nil;
s->next = nil;
- runtime·unlock(&semlock);
-}
-
-static void
-semwakeup(uint32 *addr)
-{
- Sema *s;
-
- runtime·lock(&semlock);
- for(s=semfirst; s; s=s->next) {
- if(s->addr == addr && s->g) {
- runtime·ready(s->g);
- s->g = nil;
- break;
- }
- }
- runtime·unlock(&semlock);
-}
-
-// Step 1 of sleep: make ourselves available for wakeup.
-// TODO(rsc): Maybe we can write a version without
-// locks by using cas on s->g. Maybe not: I need to
-// think more about whether it would be correct.
-static void
-semsleep1(Sema *s)
-{
- runtime·lock(&semlock);
- s->g = g;
- runtime·unlock(&semlock);
-}
-
-// Decided not to go through with it: undo step 1.
-static void
-semsleepundo1(Sema *s)
-{
- runtime·lock(&semlock);
- if(s->g != nil) {
- s->g = nil; // back ourselves out
- } else {
- // If s->g == nil already, semwakeup
- // already readied us. Since we never stopped
- // running, readying us just set g->readyonstop.
- // Clear it.
- if(g->readyonstop == 0)
- *(int32*)0x555 = 555;
- g->readyonstop = 0;
- }
- runtime·unlock(&semlock);
-}
-
-// Step 2: wait for the wakeup.
-static void
-semsleep2(Sema *s)
-{
- USED(s);
- g->status = Gwaiting;
- runtime·gosched();
}
static int32
{
uint32 v;
- while((v = *addr) > 0)
+ while((v = runtime·atomicload(addr)) > 0)
if(runtime·cas(addr, v, v-1))
return 1;
return 0;
}
-// For now has no return value.
-// Might return an ok (not interrupted) bool in the future?
void
-runtime·semacquire(uint32 *addr)
+runtime·semacquire(uint32 volatile *addr)
{
Sema s;
+ SemaRoot *root;
// Easy case.
if(cansemacquire(addr))
return;
// Harder case:
- // queue
- // try semacquire one more time, sleep if failed
- // dequeue
- // wake up one more guy to avoid races (TODO(rsc): maybe unnecessary?)
- semqueue(addr, &s);
+ // increment waiter count
+ // try cansemacquire one more time, return if succeeded
+ // enqueue itself as a waiter
+ // sleep
+ // (waiter descriptor is dequeued by signaler)
+ root = semroot(addr);
for(;;) {
- semsleep1(&s);
+ runtime·lock(root);
+ // Add ourselves to nwait to disable "easy case" in semrelease.
+ runtime·xadd(&root->nwait, 1);
+ // Check cansemacquire to avoid missed wakeup.
if(cansemacquire(addr)) {
- semsleepundo1(&s);
- break;
+ runtime·xadd(&root->nwait, -1);
+ runtime·unlock(root);
+ return;
}
- semsleep2(&s);
+ // Any semrelease after the cansemacquire knows we're waiting
+ // (we set nwait above), so go to sleep.
+ semqueue(root, addr, &s);
+ g->status = Gwaiting;
+ runtime·unlock(root);
+ runtime·gosched();
+ if(cansemacquire(addr))
+ return;
}
- semdequeue(&s);
- semwakeup(addr);
}
void
-runtime·semrelease(uint32 *addr)
+runtime·semrelease(uint32 volatile *addr)
{
- uint32 v;
+ Sema *s;
+ SemaRoot *root;
- for(;;) {
- v = *addr;
- if(runtime·cas(addr, v, v+1))
+ root = semroot(addr);
+ runtime·xadd(addr, 1);
+
+ // Easy case: no waiters?
+ // This check must happen after the xadd, to avoid a missed wakeup
+ // (see loop in semacquire).
+ if(runtime·atomicload(&root->nwait) == 0)
+ return;
+
+ // Harder case: search for a waiter and wake it.
+ runtime·lock(root);
+ if(runtime·atomicload(&root->nwait) == 0) {
+ // The count is already consumed by another goroutine,
+ // so no need to wake up another goroutine.
+ runtime·unlock(root);
+ return;
+ }
+ for(s = root->head; s; s = s->next) {
+ if(s->addr == addr) {
+ runtime·xadd(&root->nwait, -1);
+ semdequeue(root, s);
break;
+ }
}
- semwakeup(addr);
+ runtime·unlock(root);
+ if(s)
+ runtime·ready(s->g);
}
func Semacquire(addr *uint32) {