thechar = '8',
CacheLineSize = 64
};
+
+// prefetches *addr into processor's cache
+#define PREFETCH(addr) runtime·prefetch(addr)
+void runtime·prefetch(void*);
thechar = '6',
CacheLineSize = 64
};
+
+// prefetches *addr into processor's cache
+#define PREFETCH(addr) runtime·prefetch(addr)
+void runtime·prefetch(void*);
thechar = '5',
CacheLineSize = 32
};
+
+#define PREFETCH(addr) USED(addr)
MOVL $1, AX
RET
+// bool runtime·cas64(uint64 *val, uint64 *old, uint64 new)
+// Atomically:
+// if(*val == *old){
+// *val = new;
+// return 1;
+// } else {
+// *old = *val
+// return 0;
+// }
+TEXT runtime·cas64(SB), 7, $0
+ MOVL 4(SP), BP
+ MOVL 8(SP), SI
+ MOVL 0(SI), AX
+ MOVL 4(SI), DX
+ MOVL 12(SP), BX
+ MOVL 16(SP), CX
+ LOCK
+ CMPXCHG8B 0(BP)
+ JNZ cas64_fail
+ MOVL $1, AX
+ RET
+cas64_fail:
+ MOVL AX, 0(SI)
+ MOVL DX, 4(SI)
+ XORL AX, AX
+ RET
+
// bool casp(void **p, void *old, void *new)
// Atomically:
// if(*p == old){
XCHGL AX, 0(BX)
RET
+// uint64 atomicload64(uint64 volatile* addr);
+// so actually
+// void atomicload64(uint64 *res, uint64 volatile *addr);
+TEXT runtime·atomicload64(SB), 7, $0
+ MOVL 4(SP), BX
+ MOVL 8(SP), AX
+ // MOVQ (%EAX), %MM0
+ BYTE $0x0f; BYTE $0x6f; BYTE $0x00
+ // MOVQ %MM0, 0(%EBX)
+ BYTE $0x0f; BYTE $0x7f; BYTE $0x03
+ // EMMS
+ BYTE $0x0F; BYTE $0x77
+ RET
+
+// void runtime·atomicstore64(uint64 volatile* addr, uint64 v);
+TEXT runtime·atomicstore64(SB), 7, $0
+ MOVL 4(SP), AX
+ // MOVQ and EMMS were introduced on the Pentium MMX.
+ // MOVQ 0x8(%ESP), %MM0
+ BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08
+ // MOVQ %MM0, (%EAX)
+ BYTE $0x0f; BYTE $0x7f; BYTE $0x00
+ // EMMS
+ BYTE $0x0F; BYTE $0x77
+ // This is essentially a no-op, but it provides required memory fencing.
+ // It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2).
+ MOVL $0, AX
+ LOCK
+ XADDL AX, (SP)
+ RET
+
+TEXT runtime·prefetch(SB), 7, $0
+ MOVL 4(SP), AX
+ // PREFETCHNTA (AX)
+ BYTE $0x0f; BYTE $0x18; BYTE $0x00
+ RET
+
// void jmpdefer(fn, sp);
// called from deferreturn.
// 1. pop the caller
MOVL $1, AX
RET
+// bool runtime·cas64(uint64 *val, uint64 *old, uint64 new)
+// Atomically:
+// if(*val == *old){
+// *val = new;
+// return 1;
+// } else {
+// *old = *val
+// return 0;
+// }
+TEXT runtime·cas64(SB), 7, $0
+ MOVQ 8(SP), BX
+ MOVQ 16(SP), BP
+ MOVQ 0(BP), AX
+ MOVQ 24(SP), CX
+ LOCK
+ CMPXCHGQ CX, 0(BX)
+ JNZ cas64_fail
+ MOVL $1, AX
+ RET
+cas64_fail:
+ MOVQ AX, 0(BP)
+ MOVL $0, AX
+ RET
+
// bool casp(void **val, void *old, void *new)
// Atomically:
// if(*val == old){
ADDL CX, AX
RET
+TEXT runtime·xadd64(SB), 7, $0
+ MOVQ 8(SP), BX
+ MOVQ 16(SP), AX
+ MOVQ AX, CX
+ LOCK
+ XADDQ AX, 0(BX)
+ ADDQ CX, AX
+ RET
+
TEXT runtime·xchg(SB), 7, $0
MOVQ 8(SP), BX
MOVL 16(SP), AX
XCHGL AX, 0(BX)
RET
+TEXT runtime·atomicstore64(SB), 7, $0
+ MOVQ 8(SP), BX
+ MOVQ 16(SP), AX
+ XCHGQ AX, 0(BX)
+ RET
+
+TEXT runtime·prefetch(SB), 7, $0
+ MOVQ 8(SP), AX
+ // PREFETCHNTA (AX)
+ BYTE $0x0f; BYTE $0x18; BYTE $0x00
+ RET
+
// void jmpdefer(fn, sp);
// called from deferreturn.
// 1. pop the caller
{
return *addr;
}
+
+#pragma textflag 7
+uint64
+runtime·xadd64(uint64 volatile* addr, int64 v)
+{
+ uint64 old;
+
+ old = *addr;
+ while(!runtime·cas64(addr, &old, old+v)) {
+ // nothing
+ }
+ return old+v;
+}
return *addr;
}
+#pragma textflag 7
+uint64
+runtime·atomicload64(uint64 volatile* addr)
+{
+ return *addr;
+}
+
#pragma textflag 7
void*
runtime·atomicloadp(void* volatile* addr)
// license that can be found in the LICENSE file.
#include "runtime.h"
+#include "arch_GOARCH.h"
+
+static union {
+ Lock l;
+ byte pad [CacheLineSize];
+} locktab[57];
+
+#define LOCK(addr) (&locktab[((uintptr)(addr)>>3)%nelem(locktab)].l)
// Atomic add and return new value.
#pragma textflag 7
if(runtime·cas(addr, old, v))
return;
}
-}
\ No newline at end of file
+}
+
+#pragma textflag 7
+bool
+runtime·cas64(uint64 volatile *addr, uint64 *old, uint64 new)
+{
+ bool res;
+
+ runtime·lock(LOCK(addr));
+ if(*addr == *old) {
+ *addr = new;
+ res = true;
+ } else {
+ *old = *addr;
+ res = false;
+ }
+ runtime·unlock(LOCK(addr));
+ return res;
+}
+
+#pragma textflag 7
+uint64
+runtime·xadd64(uint64 volatile *addr, int64 delta)
+{
+ uint64 res;
+
+ runtime·lock(LOCK(addr));
+ res = *addr + delta;
+ *addr = res;
+ runtime·unlock(LOCK(addr));
+ return res;
+}
+
+#pragma textflag 7
+uint64
+runtime·atomicload64(uint64 volatile *addr)
+{
+ uint64 res;
+
+ runtime·lock(LOCK(addr));
+ res = *addr;
+ runtime·unlock(LOCK(addr));
+ return res;
+}
+
+#pragma textflag 7
+void
+runtime·atomicstore64(uint64 volatile *addr, uint64 v)
+{
+ runtime·lock(LOCK(addr));
+ *addr = v;
+ runtime·unlock(LOCK(addr));
+}
#include "runtime.h"
#include "stack.h"
+#include "arch_GOARCH.h"
enum {
maxround = sizeof(uintptr),
return n;
}
+static void
+TestAtomic64(void)
+{
+ uint64 z64, x64;
+
+ z64 = 42;
+ x64 = 0;
+ PREFETCH(&z64);
+ if(runtime·cas64(&z64, &x64, 1))
+ runtime·throw("cas64 failed");
+ if(x64 != 42)
+ runtime·throw("cas64 failed");
+ if(!runtime·cas64(&z64, &x64, 1))
+ runtime·throw("cas64 failed");
+ if(x64 != 42 || z64 != 1)
+ runtime·throw("cas64 failed");
+ if(runtime·atomicload64(&z64) != 1)
+ runtime·throw("load64 failed");
+ runtime·atomicstore64(&z64, (1ull<<40)+1);
+ if(runtime·atomicload64(&z64) != (1ull<<40)+1)
+ runtime·throw("store64 failed");
+ if(runtime·xadd64(&z64, (1ull<<40)+1) != (2ull<<40)+2)
+ runtime·throw("xadd64 failed");
+ if(runtime·atomicload64(&z64) != (2ull<<40)+2)
+ runtime·throw("xadd64 failed");
+}
+
void
runtime·check(void)
{
runtime·throw("float32nan2");
if(!(i != i1))
runtime·throw("float32nan3");
+
+ TestAtomic64();
}
void
int32 runtime·write(int32, void*, int32);
int32 runtime·mincore(void*, uintptr, byte*);
bool runtime·cas(uint32*, uint32, uint32);
+bool runtime·cas64(uint64*, uint64*, uint64);
bool runtime·casp(void**, void*, void*);
// Don't confuse with XADD x86 instruction,
// this one is actually 'addx', that is, add-and-fetch.
uint32 runtime·xadd(uint32 volatile*, int32);
+uint64 runtime·xadd64(uint64 volatile*, int64);
uint32 runtime·xchg(uint32 volatile*, uint32);
uint32 runtime·atomicload(uint32 volatile*);
void runtime·atomicstore(uint32 volatile*, uint32);
+void runtime·atomicstore64(uint64 volatile*, uint64);
+uint64 runtime·atomicload64(uint64 volatile*);
void* runtime·atomicloadp(void* volatile*);
void runtime·atomicstorep(void* volatile*, void*);
void runtime·jmpdefer(byte*, void*);