vs tip:
BenchmarkTCP4OneShot 172994 40485 -76.60%
BenchmarkTCP4OneShot-2 96581 30028 -68.91%
BenchmarkTCP4OneShot-4 52615 18454 -64.93%
BenchmarkTCP4OneShot-8 26351 12289 -53.36%
BenchmarkTCP4OneShot-16 12258 16093 +31.29%
BenchmarkTCP4OneShot-32 13200 17045 +29.13%
BenchmarkTCP4OneShotTimeout 124814 42932 -65.60%
BenchmarkTCP4OneShotTimeout-2 99090 29040 -70.69%
BenchmarkTCP4OneShotTimeout-4 51860 18455 -64.41%
BenchmarkTCP4OneShotTimeout-8 26100 12073 -53.74%
BenchmarkTCP4OneShotTimeout-16 12198 16654 +36.53%
BenchmarkTCP4OneShotTimeout-32 13438 17143 +27.57%
BenchmarkTCP4Persistent 115647 7782 -93.27%
BenchmarkTCP4Persistent-2 58024 4808 -91.71%
BenchmarkTCP4Persistent-4 24715 3674 -85.13%
BenchmarkTCP4Persistent-8 16431 2407 -85.35%
BenchmarkTCP4Persistent-16 2336 1875 -19.73%
BenchmarkTCP4Persistent-32 1689 1637 -3.08%
BenchmarkTCP4PersistentTimeout 79754 7859 -90.15%
BenchmarkTCP4PersistentTimeout-2 57708 5952 -89.69%
BenchmarkTCP4PersistentTimeout-4 26907 3823 -85.79%
BenchmarkTCP4PersistentTimeout-8 15036 2567 -82.93%
BenchmarkTCP4PersistentTimeout-16 2507 1903 -24.09%
BenchmarkTCP4PersistentTimeout-32 1717 1627 -5.24%
vs old scheduler:
benchmark old ns/op new ns/op delta
BenchmarkTCPOneShot 192244 40485 -78.94%
BenchmarkTCPOneShot-2 63835 30028 -52.96%
BenchmarkTCPOneShot-4 35443 18454 -47.93%
BenchmarkTCPOneShot-8 22140 12289 -44.49%
BenchmarkTCPOneShot-16 16930 16093 -4.94%
BenchmarkTCPOneShot-32 16719 17045 +1.95%
BenchmarkTCPOneShotTimeout 190495 42932 -77.46%
BenchmarkTCPOneShotTimeout-2 64828 29040 -55.20%
BenchmarkTCPOneShotTimeout-4 34591 18455 -46.65%
BenchmarkTCPOneShotTimeout-8 21989 12073 -45.10%
BenchmarkTCPOneShotTimeout-16 16848 16654 -1.15%
BenchmarkTCPOneShotTimeout-32 16796 17143 +2.07%
BenchmarkTCPPersistent 81670 7782 -90.47%
BenchmarkTCPPersistent-2 26598 4808 -81.92%
BenchmarkTCPPersistent-4 15633 3674 -76.50%
BenchmarkTCPPersistent-8 18093 2407 -86.70%
BenchmarkTCPPersistent-16 17472 1875 -89.27%
BenchmarkTCPPersistent-32 7679 1637 -78.68%
BenchmarkTCPPersistentTimeout 83186 7859 -90.55%
BenchmarkTCPPersistentTimeout-2 26883 5952 -77.86%
BenchmarkTCPPersistentTimeout-4 15776 3823 -75.77%
BenchmarkTCPPersistentTimeout-8 18180 2567 -85.88%
BenchmarkTCPPersistentTimeout-16 17454 1903 -89.10%
BenchmarkTCPPersistentTimeout-32 7798 1627 -79.14%
R=golang-dev, iant, bradfitz, dave, rsc
CC=golang-dev
https://golang.org/cl/
7579044
+++ /dev/null
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Waiting for FDs via epoll(7).
-
-package net
-
-import (
- "os"
- "syscall"
-)
-
-const (
- readFlags = syscall.EPOLLIN | syscall.EPOLLRDHUP
- writeFlags = syscall.EPOLLOUT
-)
-
-type pollster struct {
- epfd int
-
- // Events we're already waiting for
- // Must hold pollServer lock
- events map[int]uint32
-
- // An event buffer for EpollWait.
- // Used without a lock, may only be used by WaitFD.
- waitEventBuf [10]syscall.EpollEvent
- waitEvents []syscall.EpollEvent
-
- // An event buffer for EpollCtl, to avoid a malloc.
- // Must hold pollServer lock.
- ctlEvent syscall.EpollEvent
-}
-
-func newpollster() (p *pollster, err error) {
- p = new(pollster)
- if p.epfd, err = syscall.EpollCreate1(syscall.EPOLL_CLOEXEC); err != nil {
- if err != syscall.ENOSYS {
- return nil, os.NewSyscallError("epoll_create1", err)
- }
- // The arg to epoll_create is a hint to the kernel
- // about the number of FDs we will care about.
- // We don't know, and since 2.6.8 the kernel ignores it anyhow.
- if p.epfd, err = syscall.EpollCreate(16); err != nil {
- return nil, os.NewSyscallError("epoll_create", err)
- }
- syscall.CloseOnExec(p.epfd)
- }
- p.events = make(map[int]uint32)
- return p, nil
-}
-
-// First return value is whether the pollServer should be woken up.
-// This version always returns false.
-func (p *pollster) AddFD(fd int, mode int, repeat bool) (bool, error) {
- // pollServer is locked.
-
- var already bool
- p.ctlEvent.Fd = int32(fd)
- p.ctlEvent.Events, already = p.events[fd]
- if !repeat {
- p.ctlEvent.Events |= syscall.EPOLLONESHOT
- }
- if mode == 'r' {
- p.ctlEvent.Events |= readFlags
- } else {
- p.ctlEvent.Events |= writeFlags
- }
-
- var op int
- if already {
- op = syscall.EPOLL_CTL_MOD
- } else {
- op = syscall.EPOLL_CTL_ADD
- }
- if err := syscall.EpollCtl(p.epfd, op, fd, &p.ctlEvent); err != nil {
- return false, os.NewSyscallError("epoll_ctl", err)
- }
- p.events[fd] = p.ctlEvent.Events
- return false, nil
-}
-
-func (p *pollster) StopWaiting(fd int, bits uint) {
- // pollServer is locked.
-
- events, already := p.events[fd]
- if !already {
- // The fd returned by the kernel may have been
- // cancelled already; return silently.
- return
- }
-
- // If syscall.EPOLLONESHOT is not set, the wait
- // is a repeating wait, so don't change it.
- if events&syscall.EPOLLONESHOT == 0 {
- return
- }
-
- // Disable the given bits.
- // If we're still waiting for other events, modify the fd
- // event in the kernel. Otherwise, delete it.
- events &= ^uint32(bits)
- if int32(events)&^syscall.EPOLLONESHOT != 0 {
- p.ctlEvent.Fd = int32(fd)
- p.ctlEvent.Events = events
- if err := syscall.EpollCtl(p.epfd, syscall.EPOLL_CTL_MOD, fd, &p.ctlEvent); err != nil {
- print("Epoll modify fd=", fd, ": ", err.Error(), "\n")
- }
- p.events[fd] = events
- } else {
- if err := syscall.EpollCtl(p.epfd, syscall.EPOLL_CTL_DEL, fd, nil); err != nil {
- print("Epoll delete fd=", fd, ": ", err.Error(), "\n")
- }
- delete(p.events, fd)
- }
-}
-
-// Return value is whether the pollServer should be woken up.
-// This version always returns false.
-func (p *pollster) DelFD(fd int, mode int) bool {
- // pollServer is locked.
-
- if mode == 'r' {
- p.StopWaiting(fd, readFlags)
- } else {
- p.StopWaiting(fd, writeFlags)
- }
-
- // Discard any queued up events.
- i := 0
- for i < len(p.waitEvents) {
- if fd == int(p.waitEvents[i].Fd) {
- copy(p.waitEvents[i:], p.waitEvents[i+1:])
- p.waitEvents = p.waitEvents[:len(p.waitEvents)-1]
- } else {
- i++
- }
- }
- return false
-}
-
-func (p *pollster) WaitFD(s *pollServer, nsec int64) (fd int, mode int, err error) {
- for len(p.waitEvents) == 0 {
- var msec int = -1
- if nsec > 0 {
- msec = int((nsec + 1e6 - 1) / 1e6)
- }
-
- s.Unlock()
- n, err := syscall.EpollWait(p.epfd, p.waitEventBuf[0:], msec)
- s.Lock()
-
- if err != nil {
- if err == syscall.EAGAIN || err == syscall.EINTR {
- continue
- }
- return -1, 0, os.NewSyscallError("epoll_wait", err)
- }
- if n == 0 {
- return -1, 0, nil
- }
- p.waitEvents = p.waitEventBuf[0:n]
- }
-
- ev := &p.waitEvents[0]
- p.waitEvents = p.waitEvents[1:]
-
- fd = int(ev.Fd)
-
- if ev.Events&writeFlags != 0 {
- p.StopWaiting(fd, writeFlags)
- return fd, 'w', nil
- }
- if ev.Events&readFlags != 0 {
- p.StopWaiting(fd, readFlags)
- return fd, 'r', nil
- }
-
- // Other events are error conditions - wake whoever is waiting.
- events, _ := p.events[fd]
- if events&writeFlags != 0 {
- p.StopWaiting(fd, writeFlags)
- return fd, 'w', nil
- }
- p.StopWaiting(fd, readFlags)
- return fd, 'r', nil
-}
-
-func (p *pollster) Close() error {
- return os.NewSyscallError("close", syscall.Close(p.epfd))
-}
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build darwin
+// +build darwin linux,386 linux,amd64
package net
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build freebsd linux netbsd openbsd
+// +build freebsd linux,arm netbsd openbsd
package net
/*
* Input to cgo -cdefs
-GOARCH=386 cgo -cdefs defs2.go >386/defs.h
+GOARCH=386 go tool cgo -cdefs defs2_linux.go >defs_linux_386.h
The asm header tricks we have to use for Linux on amd64
(see defs.c and defs1.c) don't work here, so this is yet another
package runtime
/*
-#cgo CFLAGS: -I/home/rsc/pub/linux-2.6/arch/x86/include -I/home/rsc/pub/linux-2.6/include -D_LOOSE_KERNEL_NAMES -D__ARCH_SI_UID_T=__kernel_uid32_t
+#cgo CFLAGS: -I/tmp/linux/arch/x86/include -I/tmp/linux/include -D_LOOSE_KERNEL_NAMES -D__ARCH_SI_UID_T=__kernel_uid32_t
#define size_t __kernel_size_t
+#define pid_t int
#include <asm/signal.h>
#include <asm/mman.h>
#include <asm/sigcontext.h>
#include <asm/ucontext.h>
#include <asm/siginfo.h>
+#include <asm-generic/errno.h>
#include <asm-generic/fcntl.h>
+#include <asm-generic/poll.h>
+#include <linux/eventpoll.h>
// This is the sigaction structure from the Linux 2.1.68 kernel which
// is used with the rt_sigaction system call. For 386 this is not
__sighandler_t k_sa_handler;
unsigned long sa_flags;
void (*sa_restorer) (void);
- sigset_t sa_mask;
+ unsigned long long sa_mask;
};
*/
import "C"
const (
+ EINTR = C.EINTR
+ EAGAIN = C.EAGAIN
+ ENOMEM = C.ENOMEM
+
PROT_NONE = C.PROT_NONE
PROT_READ = C.PROT_READ
PROT_WRITE = C.PROT_WRITE
O_RDONLY = C.O_RDONLY
O_CLOEXEC = C.O_CLOEXEC
+
+ EPOLLIN = C.POLLIN
+ EPOLLOUT = C.POLLOUT
+ EPOLLERR = C.POLLERR
+ EPOLLHUP = C.POLLHUP
+ EPOLLRDHUP = C.POLLRDHUP
+ EPOLLET = C.EPOLLET
+ EPOLL_CLOEXEC = C.EPOLL_CLOEXEC
+ EPOLL_CTL_ADD = C.EPOLL_CTL_ADD
+ EPOLL_CTL_DEL = C.EPOLL_CTL_DEL
+ EPOLL_CTL_MOD = C.EPOLL_CTL_MOD
)
type Fpreg C.struct__fpreg
type Sigcontext C.struct_sigcontext
type Ucontext C.struct_ucontext
type Itimerval C.struct_itimerval
+type EpollEvent C.struct_epoll_event
/*
Input to cgo -cdefs
-GOARCH=amd64 cgo -cdefs defs.go defs1.go >amd64/defs.h
+GOARCH=amd64 go tool cgo -cdefs defs_linux.go defs1_linux.go >defs_linux_amd64.h
*/
package runtime
#include <asm/signal.h>
#include <asm/siginfo.h>
#include <asm/mman.h>
+#include <asm-generic/errno.h>
+#include <asm-generic/poll.h>
+#include <linux/eventpoll.h>
*/
import "C"
const (
+ EINTR = C.EINTR
+ EAGAIN = C.EAGAIN
+ ENOMEM = C.ENOMEM
+
PROT_NONE = C.PROT_NONE
PROT_READ = C.PROT_READ
PROT_WRITE = C.PROT_WRITE
ITIMER_REAL = C.ITIMER_REAL
ITIMER_VIRTUAL = C.ITIMER_VIRTUAL
ITIMER_PROF = C.ITIMER_PROF
+
+ EPOLLIN = C.POLLIN
+ EPOLLOUT = C.POLLOUT
+ EPOLLERR = C.POLLERR
+ EPOLLHUP = C.POLLHUP
+ EPOLLRDHUP = C.POLLRDHUP
+ EPOLLET = C.EPOLLET
+ EPOLL_CLOEXEC = C.EPOLL_CLOEXEC
+ EPOLL_CTL_ADD = C.EPOLL_CTL_ADD
+ EPOLL_CTL_DEL = C.EPOLL_CTL_DEL
+ EPOLL_CTL_MOD = C.EPOLL_CTL_MOD
)
type Timespec C.struct_timespec
type Sigaction C.struct_sigaction
type Siginfo C.siginfo_t
type Itimerval C.struct_itimerval
+type EpollEvent C.struct_epoll_event
// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs2.go
+// cgo -cdefs defs2_linux.go
enum {
+ EINTR = 0x4,
+ EAGAIN = 0xb,
+ ENOMEM = 0xc,
+
PROT_NONE = 0x0,
PROT_READ = 0x1,
PROT_WRITE = 0x2,
O_RDONLY = 0x0,
O_CLOEXEC = 0x80000,
+
+ EPOLLIN = 0x1,
+ EPOLLOUT = 0x4,
+ EPOLLERR = 0x8,
+ EPOLLHUP = 0x10,
+ EPOLLRDHUP = 0x2000,
+ EPOLLET = -0x80000000,
+ EPOLL_CLOEXEC = 0x80000,
+ EPOLL_CTL_ADD = 0x1,
+ EPOLL_CTL_DEL = 0x2,
+ EPOLL_CTL_MOD = 0x3,
};
typedef struct Fpreg Fpreg;
typedef struct Sigcontext Sigcontext;
typedef struct Ucontext Ucontext;
typedef struct Itimerval Itimerval;
+typedef struct EpollEvent EpollEvent;
#pragma pack on
Timeval it_interval;
Timeval it_value;
};
+struct EpollEvent {
+ uint32 events;
+ uint64 data;
+};
#pragma pack off
// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs.go defs1.go
+// cgo -cdefs defs_linux.go defs1_linux.go
enum {
+ EINTR = 0x4,
+ EAGAIN = 0xb,
+ ENOMEM = 0xc,
+
PROT_NONE = 0x0,
PROT_READ = 0x1,
PROT_WRITE = 0x2,
ITIMER_REAL = 0x0,
ITIMER_VIRTUAL = 0x1,
ITIMER_PROF = 0x2,
+
+ EPOLLIN = 0x1,
+ EPOLLOUT = 0x4,
+ EPOLLERR = 0x8,
+ EPOLLHUP = 0x10,
+ EPOLLRDHUP = 0x2000,
+ EPOLLET = -0x80000000,
+ EPOLL_CLOEXEC = 0x80000,
+ EPOLL_CTL_ADD = 0x1,
+ EPOLL_CTL_DEL = 0x2,
+ EPOLL_CTL_MOD = 0x3,
};
typedef struct Timespec Timespec;
typedef struct Sigaction Sigaction;
typedef struct Siginfo Siginfo;
typedef struct Itimerval Itimerval;
+typedef struct EpollEvent EpollEvent;
#pragma pack on
Timeval it_interval;
Timeval it_value;
};
+struct EpollEvent {
+ uint32 events;
+ uint64 data;
+};
#pragma pack off
// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs.go defs1.go
+// cgo -cdefs defs_linux.go defs1_linux.go
enum {
enum
{
- EAGAIN = 11,
- ENOMEM = 12,
_PAGE_SIZE = 4096,
};
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build darwin
+// +build darwin linux,386 linux,amd64
package net
--- /dev/null
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux,386 linux,amd64
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+
+int32 runtime·epollcreate(int32 size);
+int32 runtime·epollcreate1(int32 flags);
+int32 runtime·epollctl(int32 epfd, int32 op, int32 fd, EpollEvent *ev);
+int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout);
+void runtime·closeonexec(int32 fd);
+
+static int32 epfd = -1; // epoll descriptor
+
+void
+runtime·netpollinit(void)
+{
+ epfd = runtime·epollcreate1(EPOLL_CLOEXEC);
+ if(epfd >= 0)
+ return;
+ epfd = runtime·epollcreate(1024);
+ if(epfd >= 0) {
+ runtime·closeonexec(epfd);
+ return;
+ }
+ runtime·printf("netpollinit: failed to create descriptor (%d)\n", -epfd);
+ runtime·throw("netpollinit: failed to create descriptor");
+}
+
+int32
+runtime·netpollopen(int32 fd, PollDesc *pd)
+{
+ EpollEvent ev;
+
+ ev.events = EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLET;
+ ev.data = (uint64)pd;
+ return runtime·epollctl(epfd, EPOLL_CTL_ADD, fd, &ev);
+}
+
+// polls for ready network connections
+// returns list of goroutines that become runnable
+G*
+runtime·netpoll(bool block)
+{
+ EpollEvent events[128], *ev;
+ int32 n, i, waitms, mode;
+ G *gp;
+
+ if(epfd == -1)
+ return nil;
+ waitms = -1;
+ if(!block)
+ waitms = 0;
+retry:
+ n = runtime·epollwait(epfd, events, nelem(events), waitms);
+ if(n < 0) {
+ if(n != -EINTR)
+ runtime·printf("epollwait failed with %d\n", -n);
+ goto retry;
+ }
+ gp = nil;
+ for(i = 0; i < n; i++) {
+ ev = &events[i];
+ if(ev->events == 0)
+ continue;
+ mode = 0;
+ if(ev->events & (EPOLLIN|EPOLLRDHUP|EPOLLHUP|EPOLLERR))
+ mode += 'r';
+ if(ev->events & (EPOLLOUT|EPOLLHUP|EPOLLERR))
+ mode += 'w';
+ if(mode)
+ runtime·netpollready(&gp, (void*)ev->data, mode);
+ }
+ if(block && gp == nil)
+ goto retry;
+ return gp;
+}
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build freebsd linux netbsd openbsd plan9 windows
+// +build freebsd linux,arm netbsd openbsd plan9 windows
#include "runtime.h"
MOVL 12(SP), DX
CALL *runtime·_vdso(SB)
RET
+
+// int32 runtime·epollcreate(int32 size);
+TEXT runtime·epollcreate(SB),7,$0
+ MOVL $254, AX
+ MOVL 4(SP), BX
+ CALL *runtime·_vdso(SB)
+ RET
+
+// int32 runtime·epollcreate1(int32 flags);
+TEXT runtime·epollcreate1(SB),7,$0
+ MOVL $329, AX
+ MOVL 4(SP), BX
+ CALL *runtime·_vdso(SB)
+ RET
+
+// int32 runtime·epollctl(int32 epfd, int32 op, int32 fd, EpollEvent *ev);
+TEXT runtime·epollctl(SB),7,$0
+ MOVL $255, AX
+ MOVL 4(SP), BX
+ MOVL 8(SP), CX
+ MOVL 12(SP), DX
+ MOVL 16(SP), SI
+ CALL *runtime·_vdso(SB)
+ RET
+
+// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout);
+TEXT runtime·epollwait(SB),7,$0
+ MOVL $256, AX
+ MOVL 4(SP), BX
+ MOVL 8(SP), CX
+ MOVL 12(SP), DX
+ MOVL 16(SP), SI
+ CALL *runtime·_vdso(SB)
+ RET
+
+// void runtime·closeonexec(int32 fd);
+TEXT runtime·closeonexec(SB),7,$0
+ MOVL $55, AX // fcntl
+ MOVL 4(SP), BX // fd
+ MOVL $2, CX // F_SETFD
+ MOVL $1, DX // FD_CLOEXEC
+ CALL *runtime·_vdso(SB)
+ RET
MOVL $204, AX // syscall entry
SYSCALL
RET
+
+// int32 runtime·epollcreate(int32 size);
+TEXT runtime·epollcreate(SB),7,$0
+ MOVL 8(SP), DI
+ MOVL $213, AX // syscall entry
+ SYSCALL
+ RET
+
+// int32 runtime·epollcreate1(int32 flags);
+TEXT runtime·epollcreate1(SB),7,$0
+ MOVL 8(SP), DI
+ MOVL $291, AX // syscall entry
+ SYSCALL
+ RET
+
+// int32 runtime·epollctl(int32 epfd, int32 op, int32 fd, EpollEvent *ev);
+TEXT runtime·epollctl(SB),7,$0
+ MOVL 8(SP), DI
+ MOVL 12(SP), SI
+ MOVL 16(SP), DX
+ MOVQ 24(SP), R10
+ MOVL $233, AX // syscall entry
+ SYSCALL
+ RET
+
+// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout);
+TEXT runtime·epollwait(SB),7,$0
+ MOVL 8(SP), DI
+ MOVQ 16(SP), SI
+ MOVL 24(SP), DX
+ MOVL 28(SP), R10
+ MOVL $232, AX // syscall entry
+ SYSCALL
+ RET
+
+// void runtime·closeonexec(int32 fd);
+TEXT runtime·closeonexec(SB),7,$0
+ MOVL 8(SP), DI // fd
+ MOVQ $2, SI // F_SETFD
+ MOVQ $1, DX // FD_CLOEXEC
+ MOVL $72, AX // fcntl
+ SYSCALL
+ RET
{
FUTEX_WAIT = 0,
FUTEX_WAKE = 1,
-
- EINTR = 4,
- EAGAIN = 11,
};
// Atomically,