From bf0f69220255941196c684f235727fd6dc747b5c Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Wed, 14 Jun 2017 19:55:40 -0700 Subject: [PATCH] net: handle spurious netpoll wakeups in connect In some cases the netpoll code can cause a spurious wakeup. This is normally harmless, as the woken up code simply retries the operation. However, for connect, the test we were using to see whether the connect had succeeded (setsockopt(SO_ERROR)) was not reliable in the case of a spurious wakeup. Change to using a reliable test (getpeername). On Darwin we used a different technique: a second call to connect; change Darwin to use getpeername as well. Return the result of getpeername to avoid having to call it twice. Fixes #19289. Change-Id: I119ec8e7a41f482f1e590d4c65a37f6103fa22d9 Reviewed-on: https://go-review.googlesource.com/45815 Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot Reviewed-by: Brad Fitzpatrick --- src/net/fd_unix.go | 40 +++++++++++++++++------------------- src/net/fd_windows.go | 15 +++++++------- src/net/sock_posix.go | 15 +++++++++++--- src/net/tcpsock_unix_test.go | 39 ++++++++++++++++++++++++++++++++++- 4 files changed, 77 insertions(+), 32 deletions(-) diff --git a/src/net/fd_unix.go b/src/net/fd_unix.go index 1122ee4dbe..352010c17d 100644 --- a/src/net/fd_unix.go +++ b/src/net/fd_unix.go @@ -63,7 +63,7 @@ func (fd *netFD) name() string { return fd.net + ":" + ls + "->" + rs } -func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) (ret error) { +func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) (rsa syscall.Sockaddr, ret error) { // Do not need to call fd.writeLock here, // because fd is not yet accessible to user, // so no concurrent operations are possible. @@ -72,14 +72,14 @@ func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) (ret erro case nil, syscall.EISCONN: select { case <-ctx.Done(): - return mapErr(ctx.Err()) + return nil, mapErr(ctx.Err()) default: } if err := fd.pfd.Init(fd.net, true); err != nil { - return err + return nil, err } runtime.KeepAlive(fd) - return nil + return nil, nil case syscall.EINVAL: // On Solaris we can see EINVAL if the socket has // already been accepted and closed by the server. @@ -87,14 +87,14 @@ func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) (ret erro // the socket will see EOF. For details and a test // case in C see https://golang.org/issue/6828. if runtime.GOOS == "solaris" { - return nil + return nil, nil } fallthrough default: - return os.NewSyscallError("connect", err) + return nil, os.NewSyscallError("connect", err) } if err := fd.pfd.Init(fd.net, true); err != nil { - return err + return nil, err } if deadline, _ := ctx.Deadline(); !deadline.IsZero() { fd.pfd.SetWriteDeadline(deadline) @@ -152,30 +152,28 @@ func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) (ret erro if err := fd.pfd.WaitWrite(); err != nil { select { case <-ctx.Done(): - return mapErr(ctx.Err()) + return nil, mapErr(ctx.Err()) default: } - return err + return nil, err } nerr, err := getsockoptIntFunc(fd.pfd.Sysfd, syscall.SOL_SOCKET, syscall.SO_ERROR) if err != nil { - return os.NewSyscallError("getsockopt", err) + return nil, os.NewSyscallError("getsockopt", err) } switch err := syscall.Errno(nerr); err { case syscall.EINPROGRESS, syscall.EALREADY, syscall.EINTR: - case syscall.Errno(0), syscall.EISCONN: - if runtime.GOOS != "darwin" { - return nil - } - // See golang.org/issue/14548. - // On Darwin, multiple connect system calls on - // a non-blocking socket never harm SO_ERROR. - switch err := connectFunc(fd.pfd.Sysfd, ra); err { - case nil, syscall.EISCONN: - return nil + case syscall.EISCONN: + return nil, nil + case syscall.Errno(0): + // The runtime poller can wake us up spuriously; + // see issues 14548 and 19289. Check that we are + // really connected; if not, wait again. + if rsa, err := syscall.Getpeername(fd.pfd.Sysfd); err == nil { + return rsa, nil } default: - return os.NewSyscallError("getsockopt", err) + return nil, os.NewSyscallError("getsockopt", err) } runtime.KeepAlive(fd) } diff --git a/src/net/fd_windows.go b/src/net/fd_windows.go index 0e5d37ab09..c2156b255e 100644 --- a/src/net/fd_windows.go +++ b/src/net/fd_windows.go @@ -65,12 +65,13 @@ func (fd *netFD) setAddr(laddr, raddr Addr) { runtime.SetFinalizer(fd, (*netFD).Close) } -func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) error { +// Always returns nil for connected peer address result. +func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) (syscall.Sockaddr, error) { // Do not need to call fd.writeLock here, // because fd is not yet accessible to user, // so no concurrent operations are possible. if err := fd.init(); err != nil { - return err + return nil, err } if deadline, ok := ctx.Deadline(); ok && !deadline.IsZero() { fd.pfd.SetWriteDeadline(deadline) @@ -78,7 +79,7 @@ func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) error { } if !canUseConnectEx(fd.net) { err := connectFunc(fd.pfd.Sysfd, ra) - return os.NewSyscallError("connect", err) + return nil, os.NewSyscallError("connect", err) } // ConnectEx windows API requires an unconnected, previously bound socket. if la == nil { @@ -91,7 +92,7 @@ func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) error { panic("unexpected type in connect") } if err := syscall.Bind(fd.pfd.Sysfd, la); err != nil { - return os.NewSyscallError("bind", err) + return nil, os.NewSyscallError("bind", err) } } @@ -115,16 +116,16 @@ func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) error { if err := fd.pfd.ConnectEx(ra); err != nil { select { case <-ctx.Done(): - return mapErr(ctx.Err()) + return nil, mapErr(ctx.Err()) default: if _, ok := err.(syscall.Errno); ok { err = os.NewSyscallError("connectex", err) } - return err + return nil, err } } // Refresh socket properties. - return os.NewSyscallError("setsockopt", syscall.Setsockopt(fd.pfd.Sysfd, syscall.SOL_SOCKET, syscall.SO_UPDATE_CONNECT_CONTEXT, (*byte)(unsafe.Pointer(&fd.pfd.Sysfd)), int32(unsafe.Sizeof(fd.pfd.Sysfd)))) + return nil, os.NewSyscallError("setsockopt", syscall.Setsockopt(fd.pfd.Sysfd, syscall.SOL_SOCKET, syscall.SO_UPDATE_CONNECT_CONTEXT, (*byte)(unsafe.Pointer(&fd.pfd.Sysfd)), int32(unsafe.Sizeof(fd.pfd.Sysfd)))) } func (fd *netFD) Close() error { diff --git a/src/net/sock_posix.go b/src/net/sock_posix.go index 8985f8f23f..8cfc42eb7e 100644 --- a/src/net/sock_posix.go +++ b/src/net/sock_posix.go @@ -133,12 +133,13 @@ func (fd *netFD) dial(ctx context.Context, laddr, raddr sockaddr) error { } } } - var rsa syscall.Sockaddr + var rsa syscall.Sockaddr // remote address from the user + var crsa syscall.Sockaddr // remote address we actually connected to if raddr != nil { if rsa, err = raddr.sockaddr(fd.family); err != nil { return err } - if err := fd.connect(ctx, lsa, rsa); err != nil { + if crsa, err = fd.connect(ctx, lsa, rsa); err != nil { return err } fd.isConnected = true @@ -147,8 +148,16 @@ func (fd *netFD) dial(ctx context.Context, laddr, raddr sockaddr) error { return err } } + // Record the local and remote addresses from the actual socket. + // Get the local address by calling Getsockname. + // For the remote address, use + // 1) the one returned by the connect method, if any; or + // 2) the one from Getpeername, if it succeeds; or + // 3) the one passed to us as the raddr parameter. lsa, _ = syscall.Getsockname(fd.pfd.Sysfd) - if rsa, _ = syscall.Getpeername(fd.pfd.Sysfd); rsa != nil { + if crsa != nil { + fd.setAddr(fd.addrFunc()(lsa), fd.addrFunc()(crsa)) + } else if rsa, _ = syscall.Getpeername(fd.pfd.Sysfd); rsa != nil { fd.setAddr(fd.addrFunc()(lsa), fd.addrFunc()(rsa)) } else { fd.setAddr(fd.addrFunc()(lsa), raddr) diff --git a/src/net/tcpsock_unix_test.go b/src/net/tcpsock_unix_test.go index 2375fe24dc..3af1834455 100644 --- a/src/net/tcpsock_unix_test.go +++ b/src/net/tcpsock_unix_test.go @@ -2,11 +2,14 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin +// +build !plan9,!windows package net import ( + "context" + "internal/testenv" + "math/rand" "runtime" "sync" "syscall" @@ -77,3 +80,37 @@ func TestTCPSpuriousConnSetupCompletion(t *testing.T) { ln.Close() wg.Wait() } + +// Issue 19289. +// Test that a canceled Dial does not cause a subsequent Dial to succeed. +func TestTCPSpuriousConnSetupCompletionWithCancel(t *testing.T) { + if testenv.Builder() == "" { + testenv.MustHaveExternalNetwork(t) + } + t.Parallel() + const tries = 10000 + var wg sync.WaitGroup + wg.Add(tries * 2) + sem := make(chan bool, 5) + for i := 0; i < tries; i++ { + sem <- true + ctx, cancel := context.WithCancel(context.Background()) + go func() { + defer wg.Done() + time.Sleep(time.Duration(rand.Int63n(int64(5 * time.Millisecond)))) + cancel() + }() + go func(i int) { + defer wg.Done() + var dialer Dialer + // Try to connect to a real host on a port + // that it is not listening on. + _, err := dialer.DialContext(ctx, "tcp", "golang.org:3") + if err == nil { + t.Errorf("Dial to unbound port succeeded on attempt %d", i) + } + <-sem + }(i) + } + wg.Wait() +} -- 2.50.0