From 2e064cf14441460290fd25d9d61f02a9d0bae671 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Mart=C3=AD?= Date: Thu, 4 Apr 2024 13:05:13 +0900 Subject: [PATCH] encoding/csv: port the go-fuzz function to native fuzzing Beyond the required file move and refactor to use the testing package, a number of changes were made to get the fuzzing working properly. First, add more logs to see what is going on. Second, some option combinations set Comma to the null character, which simply never worked at all. I suspect the author meant to leave the comma character as the default instead. This was spotted thanks to the added logging. Third, the round-trip DeepEqual check did not work at all when any comments were involved, as the writer does not support them. Fourth and last, massage the first and second parsed records before comparing them with DeepEqual, as the nature of Reader and Writer causes empty quoted records and CRLF sequences to change. With all the changes above, the fuzzing function appears to work normally on my laptop now. I fuzzed for a solid five minutes and could no longer encounter any errors or panics. Change-Id: Ie27f65f66099bdaa076343cee18b480803d2e4d3 Reviewed-on: https://go-review.googlesource.com/c/go/+/576375 Reviewed-by: Dmitri Shuralyov LUCI-TryBot-Result: Go LUCI Reviewed-by: Joseph Tsai Reviewed-by: Ian Lance Taylor --- src/encoding/csv/fuzz.go | 70 ------------------------- src/encoding/csv/fuzz_test.go | 96 +++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 70 deletions(-) delete mode 100644 src/encoding/csv/fuzz.go create mode 100644 src/encoding/csv/fuzz_test.go diff --git a/src/encoding/csv/fuzz.go b/src/encoding/csv/fuzz.go deleted file mode 100644 index 5f5cdfcbf8..0000000000 --- a/src/encoding/csv/fuzz.go +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2019 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build gofuzz - -package csv - -import ( - "bytes" - "fmt" - "reflect" -) - -func Fuzz(data []byte) int { - score := 0 - buf := new(bytes.Buffer) - - for _, tt := range []Reader{ - {}, - {Comma: ';'}, - {Comma: '\t'}, - {LazyQuotes: true}, - {TrimLeadingSpace: true}, - {Comment: '#'}, - {Comment: ';'}, - } { - r := NewReader(bytes.NewReader(data)) - r.Comma = tt.Comma - r.Comment = tt.Comment - r.LazyQuotes = tt.LazyQuotes - r.TrimLeadingSpace = tt.TrimLeadingSpace - - records, err := r.ReadAll() - if err != nil { - continue - } - score = 1 - - buf.Reset() - w := NewWriter(buf) - w.Comma = tt.Comma - err = w.WriteAll(records) - if err != nil { - fmt.Printf("writer = %#v\n", w) - fmt.Printf("records = %v\n", records) - panic(err) - } - - r = NewReader(buf) - r.Comma = tt.Comma - r.Comment = tt.Comment - r.LazyQuotes = tt.LazyQuotes - r.TrimLeadingSpace = tt.TrimLeadingSpace - result, err := r.ReadAll() - if err != nil { - fmt.Printf("reader = %#v\n", r) - fmt.Printf("records = %v\n", records) - panic(err) - } - - if !reflect.DeepEqual(records, result) { - fmt.Println("records = \n", records) - fmt.Println("result = \n", records) - panic("not equal") - } - } - - return score -} diff --git a/src/encoding/csv/fuzz_test.go b/src/encoding/csv/fuzz_test.go new file mode 100644 index 0000000000..6342fa416d --- /dev/null +++ b/src/encoding/csv/fuzz_test.go @@ -0,0 +1,96 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package csv + +import ( + "bytes" + "reflect" + "slices" + "strings" + "testing" +) + +func FuzzRoundtrip(f *testing.F) { + f.Fuzz(func(t *testing.T, in []byte) { + buf := new(bytes.Buffer) + + t.Logf("input = %q", in) + for _, tt := range []Reader{ + {Comma: ','}, + {Comma: ';'}, + {Comma: '\t'}, + {Comma: ',', LazyQuotes: true}, + {Comma: ',', TrimLeadingSpace: true}, + {Comma: ',', Comment: '#'}, + {Comma: ',', Comment: ';'}, + } { + t.Logf("With options:") + t.Logf(" Comma = %q", tt.Comma) + t.Logf(" LazyQuotes = %t", tt.LazyQuotes) + t.Logf(" TrimLeadingSpace = %t", tt.TrimLeadingSpace) + t.Logf(" Comment = %q", tt.Comment) + r := NewReader(bytes.NewReader(in)) + r.Comma = tt.Comma + r.Comment = tt.Comment + r.LazyQuotes = tt.LazyQuotes + r.TrimLeadingSpace = tt.TrimLeadingSpace + + records, err := r.ReadAll() + if err != nil { + continue + } + t.Logf("first records = %#v", records) + + buf.Reset() + w := NewWriter(buf) + w.Comma = tt.Comma + err = w.WriteAll(records) + if err != nil { + t.Logf("writer = %#v\n", w) + t.Logf("records = %v\n", records) + t.Fatal(err) + } + if tt.Comment != 0 { + // Writer doesn't support comments, so it can turn the quoted record "#" + // into a non-quoted comment line, failing the roundtrip check below. + continue + } + t.Logf("second input = %q", buf.Bytes()) + + r = NewReader(buf) + r.Comma = tt.Comma + r.Comment = tt.Comment + r.LazyQuotes = tt.LazyQuotes + r.TrimLeadingSpace = tt.TrimLeadingSpace + result, err := r.ReadAll() + if err != nil { + t.Logf("reader = %#v\n", r) + t.Logf("records = %v\n", records) + t.Fatal(err) + } + + // The reader turns \r\n into \n. + for _, record := range records { + for i, s := range record { + record[i] = strings.ReplaceAll(s, "\r\n", "\n") + } + } + // Note that the reader parses the quoted record "" as an empty string, + // and the writer turns that into an empty line, which the reader skips over. + // Filter those out to avoid false positives. + records = slices.DeleteFunc(records, func(record []string) bool { + return len(record) == 1 && record[0] == "" + }) + // The reader uses nil when returning no records at all. + if len(records) == 0 { + records = nil + } + + if !reflect.DeepEqual(records, result) { + t.Fatalf("first read got %#v, second got %#v", records, result) + } + } + }) +} -- 2.48.1