From fdc21f3eafe94490e55e0bf018490b3aa9ba2383 Mon Sep 17 00:00:00 2001 From: Damien Neil Date: Wed, 15 Nov 2023 09:45:16 -0800 Subject: [PATCH] net/http: don't set length for non-range encoded content requests Historically, serveContent has not set Content-Length when the user provides Content-Encoding. This causes broken responses when the user sets both Content-Length and Content-Encoding, and the request is a range request, because the returned data doesn't match the declared length. CL 381956 fixed this case by changing serveContent to always set a Content-Length header. Unfortunately, I've discovered multiple cases in the wild of users setting Content-Encoding: gzip and passing serveContent a ResponseWriter wrapper that gzips the data written to it. This breaks serveContent in a number of ways. In particular, there's no way for it to respond to Range requests properly, because it doesn't know the recipient's view of the content. What the user should be doing in this case is just using io.Copy to send the gzipped data to the response. Or possibly setting Transfer-Encoding: gzip. But whatever they should be doing, what they are doing has mostly worked for non-Range requests, and setting Content-Length makes it stop working because the length of the file being served doesn't match the number of bytes being sent. So in the interests of not breaking users (even if they're misusing serveContent in ways that are already broken), partially revert CL 381956. For non-Range requests, don't set Content-Length when the user has set Content-Encoding. This matches our previous behavior and causes minimal harm in cases where we could have set Content-Length. (We will send using chunked encoding rather than identity, but that's fine.) For Range requests, set Content-Length unconditionally. Either the user isn't mangling the data in the ResponseWriter, in which case the length is correct, or they are, in which case the response isn't going to contain the right bytes anyway. (Note that a Range request for a Content-Length: gzip file is requesting a range of *gzipped* bytes, not a range from the uncompressed file.) Change-Id: I5e788e6756f34cee520aa7c456826f462a59f7eb Reviewed-on: https://go-review.googlesource.com/c/go/+/542595 Auto-Submit: Damien Neil LUCI-TryBot-Result: Go LUCI Reviewed-by: Jonathan Amsterdam --- src/net/http/fs.go | 29 ++++++++++++++++++- src/net/http/fs_test.go | 63 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 89 insertions(+), 3 deletions(-) diff --git a/src/net/http/fs.go b/src/net/http/fs.go index 20da56001c..ace74a7b80 100644 --- a/src/net/http/fs.go +++ b/src/net/http/fs.go @@ -343,8 +343,35 @@ func serveContent(w ResponseWriter, r *Request, name string, modtime time.Time, } w.Header().Set("Accept-Ranges", "bytes") - w.Header().Set("Content-Length", strconv.FormatInt(sendSize, 10)) + // We should be able to unconditionally set the Content-Length here. + // + // However, there is a pattern observed in the wild that this breaks: + // The user wraps the ResponseWriter in one which gzips data written to it, + // and sets "Content-Encoding: gzip". + // + // The user shouldn't be doing this; the serveContent path here depends + // on serving seekable data with a known length. If you want to compress + // on the fly, then you shouldn't be using ServeFile/ServeContent, or + // you should compress the entire file up-front and provide a seekable + // view of the compressed data. + // + // However, since we've observed this pattern in the wild, and since + // setting Content-Length here breaks code that mostly-works today, + // skip setting Content-Length if the user set Content-Encoding. + // + // If this is a range request, always set Content-Length. + // If the user isn't changing the bytes sent in the ResponseWrite, + // the Content-Length will be correct. + // If the user is changing the bytes sent, then the range request wasn't + // going to work properly anyway and we aren't worse off. + // + // A possible future improvement on this might be to look at the type + // of the ResponseWriter, and always set Content-Length if it's one + // that we recognize. + if len(ranges) > 0 || w.Header().Get("Content-Encoding") == "" { + w.Header().Set("Content-Length", strconv.FormatInt(sendSize, 10)) + } w.WriteHeader(code) if r.Method != "HEAD" { diff --git a/src/net/http/fs_test.go b/src/net/http/fs_test.go index d29664c16a..861e70caf2 100644 --- a/src/net/http/fs_test.go +++ b/src/net/http/fs_test.go @@ -7,6 +7,7 @@ package http_test import ( "bufio" "bytes" + "compress/gzip" "errors" "fmt" "internal/testenv" @@ -15,6 +16,7 @@ import ( "mime" "mime/multipart" "net" + "net/http" . "net/http" "net/http/httptest" "net/url" @@ -571,7 +573,7 @@ func testServeDirWithoutTrailingSlash(t *testing.T, mode testMode) { } } -// Tests that ServeFile adds a Content-Length even if a Content-Encoding is +// Tests that ServeFile doesn't add a Content-Length if a Content-Encoding is // specified. func TestServeFileWithContentEncoding(t *testing.T) { run(t, testServeFileWithContentEncoding) } func testServeFileWithContentEncoding(t *testing.T, mode testMode) { @@ -593,7 +595,7 @@ func testServeFileWithContentEncoding(t *testing.T, mode testMode) { t.Fatal(err) } resp.Body.Close() - if g, e := resp.ContentLength, int64(11); g != e { + if g, e := resp.ContentLength, int64(-1); g != e { t.Errorf("Content-Length mismatch: got %d, want %d", g, e) } } @@ -1609,3 +1611,60 @@ func TestServeFileFS(t *testing.T) { } res.Body.Close() } + +func TestServeFileZippingResponseWriter(t *testing.T) { + // This test exercises a pattern which is incorrect, + // but has been observed enough in the world that we don't want to break it. + // + // The server is setting "Content-Encoding: gzip", + // wrapping the ResponseWriter in an implementation which gzips data written to it, + // and passing this ResponseWriter to ServeFile. + // + // This means ServeFile cannot properly set a Content-Length header, because it + // doesn't know what content it is going to send--the ResponseWriter is modifying + // the bytes sent. + // + // Range requests are always going to be broken in this scenario, + // but verify that we can serve non-range requests correctly. + filename := "index.html" + contents := []byte("contents will be sent with Content-Encoding: gzip") + fsys := fstest.MapFS{ + filename: {Data: contents}, + } + ts := newClientServerTest(t, http1Mode, HandlerFunc(func(w ResponseWriter, r *Request) { + w.Header().Set("Content-Encoding", "gzip") + gzw := gzip.NewWriter(w) + defer gzw.Close() + ServeFileFS(gzipResponseWriter{w: gzw, ResponseWriter: w}, r, fsys, filename) + })).ts + defer ts.Close() + + res, err := ts.Client().Get(ts.URL + "/" + filename) + if err != nil { + t.Fatal(err) + } + b, err := io.ReadAll(res.Body) + if err != nil { + t.Fatal("reading Body:", err) + } + if s := string(b); s != string(contents) { + t.Errorf("for path %q got %q, want %q", filename, s, contents) + } + res.Body.Close() +} + +type gzipResponseWriter struct { + ResponseWriter + w *gzip.Writer +} + +func (grw gzipResponseWriter) Write(b []byte) (int, error) { + return grw.w.Write(b) +} + +func (grw gzipResponseWriter) Flush() { + grw.w.Flush() + if fw, ok := grw.ResponseWriter.(http.Flusher); ok { + fw.Flush() + } +} -- 2.48.1