]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: add 64-bit atomics
authorDmitriy Vyukov <dvyukov@google.com>
Thu, 5 Apr 2012 14:47:43 +0000 (18:47 +0400)
committerDmitriy Vyukov <dvyukov@google.com>
Thu, 5 Apr 2012 14:47:43 +0000 (18:47 +0400)
This is factored out part of:
https://golang.org/cl/5279048/
(Parallel GC)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/5985047

src/pkg/runtime/arch_386.h
src/pkg/runtime/arch_amd64.h
src/pkg/runtime/arch_arm.h
src/pkg/runtime/asm_386.s
src/pkg/runtime/asm_amd64.s
src/pkg/runtime/atomic_386.c
src/pkg/runtime/atomic_amd64.c
src/pkg/runtime/atomic_arm.c
src/pkg/runtime/runtime.c
src/pkg/runtime/runtime.h

index a0798f99e926e1e0603edfea8794b40578e8d7a7..68931aed3fa57d39bc55f2cf0aa3d79bbcdb1ba8 100644 (file)
@@ -2,3 +2,7 @@ enum {
        thechar = '8',
        CacheLineSize = 64
 };
+
+// prefetches *addr into processor's cache
+#define PREFETCH(addr) runtime·prefetch(addr)
+void   runtime·prefetch(void*);
index dd1cfc18d1684793f8e7a0b1f1f5acab50298c0c..d2800fc17dc2d608fe917af8284bba8bb99c95eb 100644 (file)
@@ -2,3 +2,7 @@ enum {
        thechar = '6',
        CacheLineSize = 64
 };
+
+// prefetches *addr into processor's cache
+#define PREFETCH(addr) runtime·prefetch(addr)
+void   runtime·prefetch(void*);
index c1a7a0f37935a7fe7f93d13f4d636bb5c2f596f7..d4ab74d585fba4eeca8ef9fdc3c3b55be9e13504 100644 (file)
@@ -2,3 +2,5 @@ enum {
        thechar = '5',
        CacheLineSize = 32
 };
+
+#define PREFETCH(addr) USED(addr)
index 21bd293ab0fc20a958ba7a3f92c99351fce4b14c..124fd2766b916b68bb868fa798145259be3ef05d 100644 (file)
@@ -299,6 +299,33 @@ TEXT runtime·cas(SB), 7, $0
        MOVL    $1, AX
        RET
 
+// bool runtime·cas64(uint64 *val, uint64 *old, uint64 new)
+// Atomically:
+//     if(*val == *old){
+//             *val = new;
+//             return 1;
+//     } else {
+//             *old = *val
+//             return 0;
+//     }
+TEXT runtime·cas64(SB), 7, $0
+       MOVL    4(SP), BP
+       MOVL    8(SP), SI
+       MOVL    0(SI), AX
+       MOVL    4(SI), DX
+       MOVL    12(SP), BX
+       MOVL    16(SP), CX
+       LOCK
+       CMPXCHG8B       0(BP)
+       JNZ     cas64_fail
+       MOVL    $1, AX
+       RET
+cas64_fail:
+       MOVL    AX, 0(SI)
+       MOVL    DX, 4(SI)
+       XORL    AX, AX
+       RET
+
 // bool casp(void **p, void *old, void *new)
 // Atomically:
 //     if(*p == old){
@@ -357,6 +384,43 @@ TEXT runtime·atomicstore(SB), 7, $0
        XCHGL   AX, 0(BX)
        RET
 
+// uint64 atomicload64(uint64 volatile* addr);
+// so actually
+// void atomicload64(uint64 *res, uint64 volatile *addr);
+TEXT runtime·atomicload64(SB), 7, $0
+       MOVL    4(SP), BX
+       MOVL    8(SP), AX
+       // MOVQ (%EAX), %MM0
+       BYTE $0x0f; BYTE $0x6f; BYTE $0x00
+       // MOVQ %MM0, 0(%EBX)
+       BYTE $0x0f; BYTE $0x7f; BYTE $0x03
+       // EMMS
+       BYTE $0x0F; BYTE $0x77
+       RET
+
+// void runtime·atomicstore64(uint64 volatile* addr, uint64 v);
+TEXT runtime·atomicstore64(SB), 7, $0
+       MOVL    4(SP), AX
+       // MOVQ and EMMS were introduced on the Pentium MMX.
+       // MOVQ 0x8(%ESP), %MM0
+       BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08
+       // MOVQ %MM0, (%EAX)
+       BYTE $0x0f; BYTE $0x7f; BYTE $0x00 
+       // EMMS
+       BYTE $0x0F; BYTE $0x77
+       // This is essentially a no-op, but it provides required memory fencing.
+       // It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2).
+       MOVL    $0, AX
+       LOCK
+       XADDL   AX, (SP)
+       RET
+
+TEXT runtime·prefetch(SB), 7, $0
+       MOVL    4(SP), AX
+       // PREFETCHNTA (AX)
+       BYTE $0x0f; BYTE $0x18; BYTE $0x00
+       RET
+
 // void jmpdefer(fn, sp);
 // called from deferreturn.
 // 1. pop the caller
index d41ab96d022f60025a28f4cfc28ff355693f7745..7a5dd830b83a8ec22d9674f44438ec9676460bf3 100644 (file)
@@ -344,6 +344,30 @@ TEXT runtime·cas(SB), 7, $0
        MOVL    $1, AX
        RET
 
+// bool        runtime·cas64(uint64 *val, uint64 *old, uint64 new)
+// Atomically:
+//     if(*val == *old){
+//             *val = new;
+//             return 1;
+//     } else {
+//             *old = *val
+//             return 0;
+//     }
+TEXT runtime·cas64(SB), 7, $0
+       MOVQ    8(SP), BX
+       MOVQ    16(SP), BP
+       MOVQ    0(BP), AX
+       MOVQ    24(SP), CX
+       LOCK
+       CMPXCHGQ        CX, 0(BX)
+       JNZ     cas64_fail
+       MOVL    $1, AX
+       RET
+cas64_fail:
+       MOVQ    AX, 0(BP)
+       MOVL    $0, AX
+       RET
+
 // bool casp(void **val, void *old, void *new)
 // Atomically:
 //     if(*val == old){
@@ -376,6 +400,15 @@ TEXT runtime·xadd(SB), 7, $0
        ADDL    CX, AX
        RET
 
+TEXT runtime·xadd64(SB), 7, $0
+       MOVQ    8(SP), BX
+       MOVQ    16(SP), AX
+       MOVQ    AX, CX
+       LOCK
+       XADDQ   AX, 0(BX)
+       ADDQ    CX, AX
+       RET
+
 TEXT runtime·xchg(SB), 7, $0
        MOVQ    8(SP), BX
        MOVL    16(SP), AX
@@ -402,6 +435,18 @@ TEXT runtime·atomicstore(SB), 7, $0
        XCHGL   AX, 0(BX)
        RET
 
+TEXT runtime·atomicstore64(SB), 7, $0
+       MOVQ    8(SP), BX
+       MOVQ    16(SP), AX
+       XCHGQ   AX, 0(BX)
+       RET
+
+TEXT runtime·prefetch(SB), 7, $0
+       MOVQ    8(SP), AX
+       // PREFETCHNTA (AX)
+       BYTE $0x0f; BYTE $0x18; BYTE $0x00
+       RET
+
 // void jmpdefer(fn, sp);
 // called from deferreturn.
 // 1. pop the caller
index a4f2a114fc285c82eca2d7ddb02009a927e5f845..79b7cbf96dcc20b7a847d31524a1cb7339171b46 100644 (file)
@@ -17,3 +17,16 @@ runtime·atomicloadp(void* volatile* addr)
 {
        return *addr;
 }
+
+#pragma textflag 7
+uint64
+runtime·xadd64(uint64 volatile* addr, int64 v)
+{
+       uint64 old;
+
+       old = *addr;
+       while(!runtime·cas64(addr, &old, old+v)) {
+               // nothing
+       }
+       return old+v;
+}
index a4f2a114fc285c82eca2d7ddb02009a927e5f845..e92d8ec212c682369a0259e9b3a39766e066278a 100644 (file)
@@ -11,6 +11,13 @@ runtime·atomicload(uint32 volatile* addr)
        return *addr;
 }
 
+#pragma textflag 7
+uint64
+runtime·atomicload64(uint64 volatile* addr)
+{
+       return *addr;
+}
+
 #pragma textflag 7
 void*
 runtime·atomicloadp(void* volatile* addr)
index 52e4059ae210fb36bd56784ec24b650c7c97d5a8..0b54840cc9fab743552020fe0cc2f04b713eb971 100644 (file)
@@ -3,6 +3,14 @@
 // license that can be found in the LICENSE file.
 
 #include "runtime.h"
+#include "arch_GOARCH.h"
+
+static union {
+       Lock l;
+       byte pad [CacheLineSize];
+} locktab[57];
+
+#define LOCK(addr) (&locktab[((uintptr)(addr)>>3)%nelem(locktab)].l)
 
 // Atomic add and return new value.
 #pragma textflag 7
@@ -80,4 +88,56 @@ runtime·atomicstore(uint32 volatile* addr, uint32 v)
                if(runtime·cas(addr, old, v))
                        return;
        }
-}
\ No newline at end of file
+}
+
+#pragma textflag 7
+bool
+runtime·cas64(uint64 volatile *addr, uint64 *old, uint64 new)
+{
+       bool res;
+       
+       runtime·lock(LOCK(addr));
+       if(*addr == *old) {
+               *addr = new;
+               res = true;
+       } else {
+               *old = *addr;
+               res = false;
+       }
+       runtime·unlock(LOCK(addr));
+       return res;
+}
+
+#pragma textflag 7
+uint64
+runtime·xadd64(uint64 volatile *addr, int64 delta)
+{
+       uint64 res;
+       
+       runtime·lock(LOCK(addr));
+       res = *addr + delta;
+       *addr = res;
+       runtime·unlock(LOCK(addr));
+       return res;
+}
+
+#pragma textflag 7
+uint64
+runtime·atomicload64(uint64 volatile *addr)
+{
+       uint64 res;
+       
+       runtime·lock(LOCK(addr));
+       res = *addr;
+       runtime·unlock(LOCK(addr));
+       return res;
+}
+
+#pragma textflag 7
+void
+runtime·atomicstore64(uint64 volatile *addr, uint64 v)
+{
+       runtime·lock(LOCK(addr));
+       *addr = v;
+       runtime·unlock(LOCK(addr));
+}
index ebb5544fbae380316f286b0d9f62b7eab6834e7d..2cb3501dd17aa8e6fc0808bc20f08299f2767830 100644 (file)
@@ -4,6 +4,7 @@
 
 #include "runtime.h"
 #include "stack.h"
+#include "arch_GOARCH.h"
 
 enum {
        maxround = sizeof(uintptr),
@@ -267,6 +268,33 @@ runtime·atoi(byte *p)
        return n;
 }
 
+static void
+TestAtomic64(void)
+{
+       uint64 z64, x64;
+
+       z64 = 42;
+       x64 = 0;
+       PREFETCH(&z64);
+       if(runtime·cas64(&z64, &x64, 1))
+               runtime·throw("cas64 failed");
+       if(x64 != 42)
+               runtime·throw("cas64 failed");
+       if(!runtime·cas64(&z64, &x64, 1))
+               runtime·throw("cas64 failed");
+       if(x64 != 42 || z64 != 1)
+               runtime·throw("cas64 failed");
+       if(runtime·atomicload64(&z64) != 1)
+               runtime·throw("load64 failed");
+       runtime·atomicstore64(&z64, (1ull<<40)+1);
+       if(runtime·atomicload64(&z64) != (1ull<<40)+1)
+               runtime·throw("store64 failed");
+       if(runtime·xadd64(&z64, (1ull<<40)+1) != (2ull<<40)+2)
+               runtime·throw("xadd64 failed");
+       if(runtime·atomicload64(&z64) != (2ull<<40)+2)
+               runtime·throw("xadd64 failed");
+}
+
 void
 runtime·check(void)
 {
@@ -342,6 +370,8 @@ runtime·check(void)
                runtime·throw("float32nan2");
        if(!(i != i1))
                runtime·throw("float32nan3");
+
+       TestAtomic64();
 }
 
 void
index 6f5aea11dbf8faa691633c12c9a2d64c22d0f778..177de6c05fe3e3431648aa6b11b84c60a8bc92ec 100644 (file)
@@ -512,13 +512,17 @@ void      runtime·tracebackothers(G*);
 int32  runtime·write(int32, void*, int32);
 int32  runtime·mincore(void*, uintptr, byte*);
 bool   runtime·cas(uint32*, uint32, uint32);
+bool   runtime·cas64(uint64*, uint64*, uint64);
 bool   runtime·casp(void**, void*, void*);
 // Don't confuse with XADD x86 instruction,
 // this one is actually 'addx', that is, add-and-fetch.
 uint32 runtime·xadd(uint32 volatile*, int32);
+uint64 runtime·xadd64(uint64 volatile*, int64);
 uint32 runtime·xchg(uint32 volatile*, uint32);
 uint32 runtime·atomicload(uint32 volatile*);
 void   runtime·atomicstore(uint32 volatile*, uint32);
+void   runtime·atomicstore64(uint64 volatile*, uint64);
+uint64 runtime·atomicload64(uint64 volatile*);
 void*  runtime·atomicloadp(void* volatile*);
 void   runtime·atomicstorep(void* volatile*, void*);
 void   runtime·jmpdefer(byte*, void*);