]> Cypherpunks repositories - gostls13.git/commitdiff
encoding/csv: port the go-fuzz function to native fuzzing
authorDaniel Martí <mvdan@mvdan.cc>
Thu, 4 Apr 2024 04:05:13 +0000 (13:05 +0900)
committerDaniel Martí <mvdan@mvdan.cc>
Fri, 5 Apr 2024 01:26:13 +0000 (01:26 +0000)
Beyond the required file move and refactor to use the testing package,
a number of changes were made to get the fuzzing working properly.

First, add more logs to see what is going on.

Second, some option combinations set Comma to the null character,
which simply never worked at all. I suspect the author meant to leave
the comma character as the default instead.
This was spotted thanks to the added logging.

Third, the round-trip DeepEqual check did not work at all
when any comments were involved, as the writer does not support them.

Fourth and last, massage the first and second parsed records before
comparing them with DeepEqual, as the nature of Reader and Writer
causes empty quoted records and CRLF sequences to change.

With all the changes above, the fuzzing function appears to work
normally on my laptop now. I fuzzed for a solid five minutes and
could no longer encounter any errors or panics.

Change-Id: Ie27f65f66099bdaa076343cee18b480803d2e4d3
Reviewed-on: https://go-review.googlesource.com/c/go/+/576375
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Joseph Tsai <joetsai@digital-static.net>
Reviewed-by: Ian Lance Taylor <iant@google.com>
src/encoding/csv/fuzz.go [deleted file]
src/encoding/csv/fuzz_test.go [new file with mode: 0644]

diff --git a/src/encoding/csv/fuzz.go b/src/encoding/csv/fuzz.go
deleted file mode 100644 (file)
index 5f5cdfc..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build gofuzz
-
-package csv
-
-import (
-       "bytes"
-       "fmt"
-       "reflect"
-)
-
-func Fuzz(data []byte) int {
-       score := 0
-       buf := new(bytes.Buffer)
-
-       for _, tt := range []Reader{
-               {},
-               {Comma: ';'},
-               {Comma: '\t'},
-               {LazyQuotes: true},
-               {TrimLeadingSpace: true},
-               {Comment: '#'},
-               {Comment: ';'},
-       } {
-               r := NewReader(bytes.NewReader(data))
-               r.Comma = tt.Comma
-               r.Comment = tt.Comment
-               r.LazyQuotes = tt.LazyQuotes
-               r.TrimLeadingSpace = tt.TrimLeadingSpace
-
-               records, err := r.ReadAll()
-               if err != nil {
-                       continue
-               }
-               score = 1
-
-               buf.Reset()
-               w := NewWriter(buf)
-               w.Comma = tt.Comma
-               err = w.WriteAll(records)
-               if err != nil {
-                       fmt.Printf("writer  = %#v\n", w)
-                       fmt.Printf("records = %v\n", records)
-                       panic(err)
-               }
-
-               r = NewReader(buf)
-               r.Comma = tt.Comma
-               r.Comment = tt.Comment
-               r.LazyQuotes = tt.LazyQuotes
-               r.TrimLeadingSpace = tt.TrimLeadingSpace
-               result, err := r.ReadAll()
-               if err != nil {
-                       fmt.Printf("reader  = %#v\n", r)
-                       fmt.Printf("records = %v\n", records)
-                       panic(err)
-               }
-
-               if !reflect.DeepEqual(records, result) {
-                       fmt.Println("records = \n", records)
-                       fmt.Println("result  = \n", records)
-                       panic("not equal")
-               }
-       }
-
-       return score
-}
diff --git a/src/encoding/csv/fuzz_test.go b/src/encoding/csv/fuzz_test.go
new file mode 100644 (file)
index 0000000..6342fa4
--- /dev/null
@@ -0,0 +1,96 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package csv
+
+import (
+       "bytes"
+       "reflect"
+       "slices"
+       "strings"
+       "testing"
+)
+
+func FuzzRoundtrip(f *testing.F) {
+       f.Fuzz(func(t *testing.T, in []byte) {
+               buf := new(bytes.Buffer)
+
+               t.Logf("input = %q", in)
+               for _, tt := range []Reader{
+                       {Comma: ','},
+                       {Comma: ';'},
+                       {Comma: '\t'},
+                       {Comma: ',', LazyQuotes: true},
+                       {Comma: ',', TrimLeadingSpace: true},
+                       {Comma: ',', Comment: '#'},
+                       {Comma: ',', Comment: ';'},
+               } {
+                       t.Logf("With options:")
+                       t.Logf("  Comma            = %q", tt.Comma)
+                       t.Logf("  LazyQuotes       = %t", tt.LazyQuotes)
+                       t.Logf("  TrimLeadingSpace = %t", tt.TrimLeadingSpace)
+                       t.Logf("  Comment          = %q", tt.Comment)
+                       r := NewReader(bytes.NewReader(in))
+                       r.Comma = tt.Comma
+                       r.Comment = tt.Comment
+                       r.LazyQuotes = tt.LazyQuotes
+                       r.TrimLeadingSpace = tt.TrimLeadingSpace
+
+                       records, err := r.ReadAll()
+                       if err != nil {
+                               continue
+                       }
+                       t.Logf("first records = %#v", records)
+
+                       buf.Reset()
+                       w := NewWriter(buf)
+                       w.Comma = tt.Comma
+                       err = w.WriteAll(records)
+                       if err != nil {
+                               t.Logf("writer  = %#v\n", w)
+                               t.Logf("records = %v\n", records)
+                               t.Fatal(err)
+                       }
+                       if tt.Comment != 0 {
+                               // Writer doesn't support comments, so it can turn the quoted record "#"
+                               // into a non-quoted comment line, failing the roundtrip check below.
+                               continue
+                       }
+                       t.Logf("second input = %q", buf.Bytes())
+
+                       r = NewReader(buf)
+                       r.Comma = tt.Comma
+                       r.Comment = tt.Comment
+                       r.LazyQuotes = tt.LazyQuotes
+                       r.TrimLeadingSpace = tt.TrimLeadingSpace
+                       result, err := r.ReadAll()
+                       if err != nil {
+                               t.Logf("reader  = %#v\n", r)
+                               t.Logf("records = %v\n", records)
+                               t.Fatal(err)
+                       }
+
+                       // The reader turns \r\n into \n.
+                       for _, record := range records {
+                               for i, s := range record {
+                                       record[i] = strings.ReplaceAll(s, "\r\n", "\n")
+                               }
+                       }
+                       // Note that the reader parses the quoted record "" as an empty string,
+                       // and the writer turns that into an empty line, which the reader skips over.
+                       // Filter those out to avoid false positives.
+                       records = slices.DeleteFunc(records, func(record []string) bool {
+                               return len(record) == 1 && record[0] == ""
+                       })
+                       // The reader uses nil when returning no records at all.
+                       if len(records) == 0 {
+                               records = nil
+                       }
+
+                       if !reflect.DeepEqual(records, result) {
+                               t.Fatalf("first read got %#v, second got %#v", records, result)
+                       }
+               }
+       })
+}