]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: use GetQueuedCompletionStatusEx on windows if available
authorDmitriy Vyukov <dvyukov@google.com>
Thu, 8 Aug 2013 13:41:57 +0000 (17:41 +0400)
committerDmitriy Vyukov <dvyukov@google.com>
Thu, 8 Aug 2013 13:41:57 +0000 (17:41 +0400)
GetQueuedCompletionStatusEx allows to dequeue a batch of completion
notifications, which is more efficient than dequeueing one by one.

benchmark                           old ns/op    new ns/op    delta
BenchmarkClientServerParallel4         100605        90945   -9.60%
BenchmarkClientServerParallel4-2        90225        74504  -17.42%

R=golang-dev, alex.brainman
CC=golang-dev
https://golang.org/cl/12436044

src/pkg/runtime/netpoll.goc
src/pkg/runtime/netpoll_windows.c
src/pkg/runtime/os_windows.c
src/pkg/runtime/os_windows.h
src/pkg/runtime/runtime.h

index ec9a31d2dac0c2651328bbd6de377abee4e66fdf..ebe6defa00b9761ea3b597d204b925aed10bc0c3 100644 (file)
@@ -206,6 +206,12 @@ func runtime_pollUnblock(pd *PollDesc) {
                runtime·ready(wg);
 }
 
+uintptr
+runtime·netpollfd(PollDesc *pd)
+{
+       return pd->fd;
+}
+
 // make pd ready, newly runnable goroutines (if any) are enqueued info gpp list
 void
 runtime·netpollready(G **gpp, PollDesc *pd, int32 mode)
index 7a95380a334a06be67a6ced93946d2f3b988641f..b510a41e26f5d77cc37cf9e2755f7260a532cf31 100644 (file)
 
 #pragma dynimport runtime·CreateIoCompletionPort CreateIoCompletionPort "kernel32.dll"
 #pragma dynimport runtime·GetQueuedCompletionStatus GetQueuedCompletionStatus "kernel32.dll"
+#pragma dynimport runtime·WSAGetOverlappedResult WSAGetOverlappedResult "ws2_32.dll"
 
 extern void *runtime·CreateIoCompletionPort;
 extern void *runtime·GetQueuedCompletionStatus;
+extern void *runtime·WSAGetOverlappedResult;
 
 #define INVALID_HANDLE_VALUE ((uintptr)-1)
 
@@ -23,12 +25,23 @@ struct net_op
        // used by windows
        Overlapped      o;
        // used by netpoll
-       uintptr runtimeCtx;
+       PollDesc*       pd;
        int32   mode;
        int32   errno;
        uint32  qty;
 };
 
+typedef struct OverlappedEntry OverlappedEntry;
+struct OverlappedEntry
+{
+       uintptr key;
+       net_op* op;  // In reality it's Overlapped*, but we cast it to net_op* anyway.
+       uintptr internal;
+       uint32  qty;
+};
+
+static void handlecompletion(G **gpp, net_op *o, int32 errno, uint32 qty);
+
 static uintptr iocphandle = INVALID_HANDLE_VALUE;  // completion port io handle
 
 void
@@ -64,49 +77,72 @@ runtime·netpollclose(uintptr fd)
 G*
 runtime·netpoll(bool block)
 {
-       uint32 wait, qty, key;
-       int32 mode, errno;
-       net_op *o;
+       OverlappedEntry entries[64];
+       uint32 wait, qty, key, flags, n, i;
+       int32 errno;
+       net_op *op;
        G *gp;
 
        if(iocphandle == INVALID_HANDLE_VALUE)
                return nil;
        gp = nil;
+       wait = 0;
+       if(block)
+               wait = INFINITE;
 retry:
-       o = nil;
-       errno = 0;
-       qty = 0;
-       wait = INFINITE;
-       if(!block)
-               wait = 0;
-       // TODO(brainman): Need a loop here to fetch all pending notifications
-       // (or at least a batch). Scheduler will behave better if is given
-       // a batch of newly runnable goroutines.
-       // TODO(brainman): Call GetQueuedCompletionStatusEx() here when possible.
-       if(runtime·stdcall(runtime·GetQueuedCompletionStatus, 5, iocphandle, &qty, &key, &o, (uintptr)wait) == 0) {
-               errno = runtime·getlasterror();
-               if(o == nil && errno == WAIT_TIMEOUT) {
-                       if(!block)
+       if(runtime·GetQueuedCompletionStatusEx != nil) {
+               n = nelem(entries) / runtime·gomaxprocs;
+               if(n < 8)
+                       n = 8;
+               if(runtime·stdcall(runtime·GetQueuedCompletionStatusEx, 6, iocphandle, entries, (uintptr)n, &n, (uintptr)wait, (uintptr)0) == 0) {
+                       errno = runtime·getlasterror();
+                       if(!block && errno == WAIT_TIMEOUT)
                                return nil;
-                       runtime·throw("netpoll: GetQueuedCompletionStatus timed out");
+                       runtime·printf("netpoll: GetQueuedCompletionStatusEx failed (errno=%d)\n", errno);
+                       runtime·throw("netpoll: GetQueuedCompletionStatusEx failed");
                }
-               if(o == nil) {
-                       runtime·printf("netpoll: GetQueuedCompletionStatus failed (errno=%d)\n", errno);
-                       runtime·throw("netpoll: GetQueuedCompletionStatus failed");
+               for(i = 0; i < n; i++) {
+                       op = entries[i].op;
+                       errno = 0;
+                       qty = 0;
+                       if(runtime·stdcall(runtime·WSAGetOverlappedResult, 5, runtime·netpollfd(op->pd), op, &qty, (uintptr)0, (uintptr)&flags) == 0)
+                               errno = runtime·getlasterror();
+                       handlecompletion(&gp, op, errno, qty);
                }
-               // dequeued failed IO packet, so report that
+       } else {
+               op = nil;
+               errno = 0;
+               qty = 0;
+               if(runtime·stdcall(runtime·GetQueuedCompletionStatus, 5, iocphandle, &qty, &key, &op, (uintptr)wait) == 0) {
+                       errno = runtime·getlasterror();
+                       if(!block && errno == WAIT_TIMEOUT)
+                               return nil;
+                       if(op == nil) {
+                               runtime·printf("netpoll: GetQueuedCompletionStatus failed (errno=%d)\n", errno);
+                               runtime·throw("netpoll: GetQueuedCompletionStatus failed");
+                       }
+                       // dequeued failed IO packet, so report that
+               }
+               handlecompletion(&gp, op, errno, qty);
        }
-       if(o == nil)
-               runtime·throw("netpoll: GetQueuedCompletionStatus returned o == nil");
-       mode = o->mode;
+       if(block && gp == nil)
+               goto retry;
+       return gp;
+}
+
+static void
+handlecompletion(G **gpp, net_op *op, int32 errno, uint32 qty)
+{
+       int32 mode;
+
+       if(op == nil)
+               runtime·throw("netpoll: GetQueuedCompletionStatus returned op == nil");
+       mode = op->mode;
        if(mode != 'r' && mode != 'w') {
                runtime·printf("netpoll: GetQueuedCompletionStatus returned invalid mode=%d\n", mode);
                runtime·throw("netpoll: GetQueuedCompletionStatus returned invalid mode");
        }
-       o->errno = errno;
-       o->qty = qty;
-       runtime·netpollready(&gp, (void*)o->runtimeCtx, mode);
-       if(block && gp == nil)
-               goto retry;
-       return gp;
+       op->errno = errno;
+       op->qty = qty;
+       runtime·netpollready(gpp, op->pd, mode);
 }
index 0c3b4d961e456e2432ee88f713847fa001986656..cefa846c4b3c479d8f7194ba89e1e38592821dfe 100644 (file)
@@ -68,6 +68,8 @@ extern void *runtime·timeBeginPeriod;
 extern void *runtime·WaitForSingleObject;
 extern void *runtime·WriteFile;
 
+void *runtime·GetQueuedCompletionStatusEx;
+
 static int32
 getproccount(void)
 {
@@ -100,6 +102,7 @@ runtime·osinit(void)
                SetProcessPriorityBoost = runtime·stdcall(runtime·GetProcAddress, 2, kernel32, "SetProcessPriorityBoost");
                if(SetProcessPriorityBoost != nil)  // supported since Windows XP
                        runtime·stdcall(SetProcessPriorityBoost, 2, (uintptr)-1, (uintptr)1);
+               runtime·GetQueuedCompletionStatusEx = runtime·stdcall(runtime·GetProcAddress, 2, kernel32, "GetQueuedCompletionStatusEx");
        }
 }
 
index cf0ecb68eea00a55ba5bcb8969cf7de9812eead8..b64fa88736f0a2451925ab6630865c145e67a74b 100644 (file)
@@ -4,6 +4,7 @@
 
 extern void *runtime·LoadLibrary;
 extern void *runtime·GetProcAddress;
+extern void *runtime·GetQueuedCompletionStatusEx;
 
 // Call a Windows function with stdcall conventions,
 // and switch to os stack during the call.
index 55ae16e27627e24010405872400ebfb238615efb..2529a0fdcf45ed0e7347d09732120ddac618e99c 100644 (file)
@@ -853,6 +853,7 @@ void        runtime·netpollinit(void);
 int32  runtime·netpollopen(uintptr, PollDesc*);
 int32   runtime·netpollclose(uintptr);
 void   runtime·netpollready(G**, PollDesc*, int32);
+uintptr        runtime·netpollfd(PollDesc*);
 void   runtime·crash(void);
 void   runtime·parsedebugvars(void);
 void   _rt0_go(void);