The current cas64 definition hard-codes the x86 behavior
of updating *old with the new value when the cas fails.
This is inconsistent with cas32 and casp.
Make it consistent.
This means that the cas64 uses will be epsilon less efficient
than they might be, because they have to do an unnecessary
memory load on x86. But so be it. Code clarity and consistency
is more important.
R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/
10909045
// *val = new;
// return 1;
// } else {
-// *old = *val
// return 0;
// }
TEXT runtime·cas64(SB), 7, $0
MOVQ 8(SP), BX
- MOVQ 16(SP), BP
- MOVQ 0(BP), AX
+ MOVQ 16(SP), AX
MOVQ 24(SP), CX
LOCK
CMPXCHGQ CX, 0(BX)
MOVL $1, AX
RET
cas64_fail:
- MOVQ AX, 0(BP)
MOVL $0, AX
RET
{
uint64 old;
- old = *addr;
- while(!runtime·cas64(addr, &old, old+v)) {
- // nothing
- }
+ do
+ old = *addr;
+ while(!runtime·cas64(addr, old, old+v));
+
return old+v;
}
{
uint64 old;
- old = *addr;
- while(!runtime·cas64(addr, &old, v)) {
- // nothing
- }
+ do
+ old = addr;
+ while(!runtime·cas64(addr, old, v));
+
return old;
}
#pragma textflag 7
bool
-runtime·cas64(uint64 volatile *addr, uint64 *old, uint64 new)
+runtime·cas64(uint64 volatile *addr, uint64 old, uint64 new)
{
bool res;
runtime·lock(LOCK(addr));
- if(*addr == *old) {
+ if(*addr == old) {
*addr = new;
res = true;
} else {
- *old = *addr;
res = false;
}
runtime·unlock(LOCK(addr));
node->pushcnt++;
new = (uint64)(uintptr)node|(((uint64)node->pushcnt&CNT_MASK)<<PTR_BITS);
- old = runtime·atomicload64(head);
for(;;) {
+ old = runtime·atomicload64(head);
node->next = (LFNode*)(uintptr)(old&PTR_MASK);
- if(runtime·cas64(head, &old, new))
+ if(runtime·cas64(head, old, new))
break;
}
}
LFNode *node, *node2;
uint64 old, new;
- old = runtime·atomicload64(head);
for(;;) {
+ old = runtime·atomicload64(head);
if(old == 0)
return nil;
node = (LFNode*)(uintptr)(old&PTR_MASK);
new = 0;
if(node2 != nil)
new = (uint64)(uintptr)node2|(((uint64)node2->pushcnt&CNT_MASK)<<PTR_BITS);
- if(runtime·cas64(head, &old, new))
+ if(runtime·cas64(head, old, new))
return node;
}
}
if(victim >= tid)
victim++;
victimpos = &desc->thr[victim].pos;
- pos = runtime·atomicload64(victimpos);
for(;;) {
// See if it has any work.
+ pos = runtime·atomicload64(victimpos);
begin = (uint32)pos;
end = (uint32)(pos>>32);
if(begin+1 >= end) {
}
begin2 = begin + (end-begin)/2;
newpos = (uint64)begin | (uint64)begin2<<32;
- if(runtime·cas64(victimpos, &pos, newpos)) {
+ if(runtime·cas64(victimpos, pos, newpos)) {
begin = begin2;
break;
}
z64 = 42;
x64 = 0;
PREFETCH(&z64);
- if(runtime·cas64(&z64, &x64, 1))
+ if(runtime·cas64(&z64, x64, 1))
runtime·throw("cas64 failed");
- if(x64 != 42)
+ if(x64 != 0)
runtime·throw("cas64 failed");
- if(!runtime·cas64(&z64, &x64, 1))
+ x64 = 42;
+ if(!runtime·cas64(&z64, x64, 1))
runtime·throw("cas64 failed");
if(x64 != 42 || z64 != 1)
runtime·throw("cas64 failed");
uint64 h;
float32 i, i1;
float64 j, j1;
- void* k;
+ byte *k, *k1;
uint16* l;
struct x1 {
byte x;
if(z != 4)
runtime·throw("cas4");
+ k = (byte*)0xfedcb123;
+ if(sizeof(void*) == 8)
+ k = (byte*)((uintptr)k<<10);
+ if(runtime·casp((void**)&k, nil, nil))
+ runtime·throw("casp1");
+ k1 = k+1;
+ if(!runtime·casp((void**)&k, k, k1))
+ runtime·throw("casp2");
+ if(k != k1)
+ runtime·throw("casp3");
+
*(uint64*)&j = ~0ULL;
if(j == j)
runtime·throw("float64nan");
int32 runtime·close(int32);
int32 runtime·mincore(void*, uintptr, byte*);
bool runtime·cas(uint32*, uint32, uint32);
-bool runtime·cas64(uint64*, uint64*, uint64);
+bool runtime·cas64(uint64*, uint64, uint64);
bool runtime·casp(void**, void*, void*);
// Don't confuse with XADD x86 instruction,
// this one is actually 'addx', that is, add-and-fetch.