]> Cypherpunks repositories - gostls13.git/commitdiff
net/http: handle close/response race more gracefully
authorDaniel Morsing <daniel.morsing@gmail.com>
Thu, 18 Dec 2014 14:05:48 +0000 (15:05 +0100)
committerBrad Fitzpatrick <bradfitz@golang.org>
Thu, 9 Apr 2015 13:46:50 +0000 (13:46 +0000)
There was a logical race in Transport.RoundTrip where a roundtrip with
a pending response would race with the channel for the connection
closing. This usually happened for responses with connection: close
and no body.

We handled this race by reading the close channel, setting a timer
for 100ms and if no response was returned before then, we would then
return an error.

This put a lower bound on how fast a connection could fail. We couldn't
fail a request faster than 100ms.

Reordering the channel operations gets rid of the logical race. If
the readLoop causes the connection to be closed, it would have put
its response into the return channel already and we can fetch it with
a non-blocking receive.

Change-Id: Idf09e48d7a0453d7de0120d3055d0ce5893a5428
Reviewed-on: https://go-review.googlesource.com/1787
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
src/net/http/export_test.go
src/net/http/transport.go
src/net/http/transport_test.go

index 87b6c0773aa5cce710fd9130796827deac53062b..e0bbc8067047597d0d0c8ee9e4555cca56b89468 100644 (file)
@@ -78,6 +78,10 @@ func (t *Transport) PutIdleTestConn() bool {
        })
 }
 
+func SetInstallConnClosedHook(f func()) {
+       testHookPersistConnClosedGotRes = f
+}
+
 func NewTestTimeoutHandler(handler Handler, ch <-chan time.Time) Handler {
        f := func() <-chan time.Time {
                return ch
index b18e445cbc1ca6f0475d7b166504da8ae88a625f..2528b8e1cd43bd00f702f7e66e484def8382195a 100644 (file)
@@ -914,14 +914,22 @@ func (pc *persistConn) readLoop() {
                        }
                }
 
+               // The connection might be going away when we put the
+               // idleConn below. When that happens, we close the response channel to signal
+               // to roundTrip that the connection is gone. roundTrip waits for
+               // both closing and a response in a select, so it might choose
+               // the close channel, rather than the response.
+               // We send the response first so that roundTrip can check
+               // if there is a pending one with a non-blocking select
+               // on the response channel before erroring out.
+               rc.ch <- responseAndError{resp, err}
+
                if alive && !hasBody {
                        alive = !pc.sawEOF &&
                                pc.wroteRequest() &&
                                pc.t.putIdleConn(pc)
                }
 
-               rc.ch <- responseAndError{resp, err}
-
                // Wait for the just-returned response body to be fully consumed
                // before we race and peek on the underlying bufio reader.
                if waitForBodyRead != nil {
@@ -1028,6 +1036,8 @@ func (e *httpError) Temporary() bool { return true }
 var errTimeout error = &httpError{err: "net/http: timeout awaiting response headers", timeout: true}
 var errClosed error = &httpError{err: "net/http: transport closed before response was received"}
 
+var testHookPersistConnClosedGotRes func() // nil except for tests
+
 func (pc *persistConn) roundTrip(req *transportRequest) (resp *Response, err error) {
        pc.t.setReqCanceler(req.Request, pc.cancelRequest)
        pc.lk.Lock()
@@ -1078,8 +1088,6 @@ func (pc *persistConn) roundTrip(req *transportRequest) (resp *Response, err err
        pc.reqch <- requestAndChan{req.Request, resc, requestedGzip}
 
        var re responseAndError
-       var pconnDeadCh = pc.closech
-       var failTicker <-chan time.Time
        var respHeaderTimer <-chan time.Time
 WaitResponse:
        for {
@@ -1095,23 +1103,25 @@ WaitResponse:
                                defer timer.Stop() // prevent leaks
                                respHeaderTimer = timer.C
                        }
-               case <-pconnDeadCh:
+               case <-pc.closech:
                        // The persist connection is dead. This shouldn't
                        // usually happen (only with Connection: close responses
                        // with no response bodies), but if it does happen it
                        // means either a) the remote server hung up on us
                        // prematurely, or b) the readLoop sent us a response &
                        // closed its closech at roughly the same time, and we
-                       // selected this case first, in which case a response
-                       // might still be coming soon.
-                       //
-                       // We can't avoid the select race in b) by using a unbuffered
-                       // resc channel instead, because then goroutines can
-                       // leak if we exit due to other errors.
-                       pconnDeadCh = nil                               // avoid spinning
-                       failTicker = time.After(100 * time.Millisecond) // arbitrary time to wait for resc
-               case <-failTicker:
-                       re = responseAndError{err: errClosed}
+                       // selected this case first. If we got a response, readLoop makes sure
+                       // to send it before it puts the conn and closes the channel.
+                       // That way, we can fetch the response, if there is one,
+                       // with a non-blocking receive.
+                       select {
+                       case re = <-resc:
+                               if fn := testHookPersistConnClosedGotRes; fn != nil {
+                                       fn()
+                               }
+                       default:
+                               re = responseAndError{err: errClosed}
+                       }
                        break WaitResponse
                case <-respHeaderTimer:
                        pc.close()
index 504a6a7b56440593a04ff82f3cba9175dce9612f..b56defdc078156762701cc8cb76c102c9c85e1b5 100644 (file)
@@ -2275,6 +2275,52 @@ func TestTransportRangeAndGzip(t *testing.T) {
        res.Body.Close()
 }
 
+// Previously, we used to handle a logical race within RoundTrip by waiting for 100ms
+// in the case of an error. Changing the order of the channel operations got rid of this
+// race.
+//
+// In order to test that the channel op reordering works, we install a hook into the
+// roundTrip function which gets called if we saw the connection go away and
+// we subsequently received a response.
+func TestTransportResponseCloseRace(t *testing.T) {
+       if testing.Short() {
+               t.Skip("skipping in short mode")
+       }
+       defer afterTest(t)
+
+       ts := httptest.NewServer(HandlerFunc(func(w ResponseWriter, r *Request) {
+       }))
+       defer ts.Close()
+       sawRace := false
+       SetInstallConnClosedHook(func() {
+               sawRace = true
+       })
+       defer SetInstallConnClosedHook(nil)
+       tr := &Transport{
+               DisableKeepAlives: true,
+       }
+       req, err := NewRequest("GET", ts.URL, nil)
+       if err != nil {
+               t.Fatal(err)
+       }
+       // selects are not deterministic, so do this a bunch
+       // and see if we handle the logical race at least once.
+       for i := 0; i < 10000; i++ {
+               resp, err := tr.RoundTrip(req)
+               if err != nil {
+                       t.Fatalf("unexpected error: %s", err)
+                       continue
+               }
+               resp.Body.Close()
+               if sawRace {
+                       break
+               }
+       }
+       if !sawRace {
+               t.Errorf("didn't see response/connection going away race")
+       }
+}
+
 func wantBody(res *http.Response, err error, want string) error {
        if err != nil {
                return err