From 2f072cf8a975afb082b40cb29238ce536b5ae9b6 Mon Sep 17 00:00:00 2001 From: Katie Hockman Date: Tue, 5 Jan 2021 16:01:49 -0500 Subject: [PATCH] [dev.fuzz] internal/fuzz: implement a more robust mutator This change also allocates a larger capacity (100 MB) for the shared memory at the start, rather than beginning as small as possible and immediately needing to grow while mutating. This means that 100 MB is the maximum size of a corpus entry currently, since growing the shared memory is not yet supported. The code in internal/fuzz/mutator.go and internal/fuzz/pcg.go are copied from, or heavily inspired by, code originally authored by Dmitry Vyukov and Josh Bleecher Snyder as part of the go-fuzz project. Thanks to them for their contributions. See https://github.com/dvyukov/go-fuzz. Change-Id: I0d51d53976e23933072e760ff78e6c4ad9dcd862 Reviewed-on: https://go-review.googlesource.com/c/go/+/281972 Run-TryBot: Katie Hockman TryBot-Result: Go Bot Reviewed-by: Jay Conrod Trust: Katie Hockman --- .../script/test_fuzz_mutate_crash.txt | 66 ++++- ..._fuzz_mutate.txt => test_fuzz_mutator.txt} | 45 +++- src/internal/fuzz/fuzz.go | 11 +- src/internal/fuzz/mem.go | 9 +- src/internal/fuzz/mutator.go | 244 +++++++++++++++++- src/internal/fuzz/pcg.go | 104 ++++++++ src/internal/fuzz/worker.go | 9 +- 7 files changed, 454 insertions(+), 34 deletions(-) rename src/cmd/go/testdata/script/{test_fuzz_mutate.txt => test_fuzz_mutator.txt} (73%) create mode 100644 src/internal/fuzz/pcg.go diff --git a/src/cmd/go/testdata/script/test_fuzz_mutate_crash.txt b/src/cmd/go/testdata/script/test_fuzz_mutate_crash.txt index 6816950265..2d5e1e5fd7 100644 --- a/src/cmd/go/testdata/script/test_fuzz_mutate_crash.txt +++ b/src/cmd/go/testdata/script/test_fuzz_mutate_crash.txt @@ -13,23 +13,23 @@ go test -parallel=1 # Running the fuzzer should find a crashing input quickly. ! go test -fuzz=FuzzWithBug -fuzztime=5s -parallel=1 -stdout 'testdata[/\\]corpus[/\\]FuzzWithBug[/\\]fb8e20fc2e4c3f248c60c39bd652f3c1347298bb977b8b4d5903b85055620603' +stdout 'testdata[/\\]corpus[/\\]FuzzWithBug[/\\]' stdout 'this input caused a crash!' -grep '\Aab\z' testdata/corpus/FuzzWithBug/fb8e20fc2e4c3f248c60c39bd652f3c1347298bb977b8b4d5903b85055620603 +go run check_testdata.go FuzzWithBug # Now, the failing bytes should have been added to the seed corpus for # the target, and should fail when run without fuzzing. ! go test -parallel=1 ! go test -run=FuzzWithNilPanic -fuzz=FuzzWithNilPanic -fuzztime=5s -parallel=1 -stdout 'testdata[/\\]corpus[/\\]FuzzWithNilPanic[/\\]f45de51cdef30991551e41e882dd7b5404799648a0a00753f44fc966e6153fc1' +stdout 'testdata[/\\]corpus[/\\]FuzzWithNilPanic[/\\]' stdout 'runtime.Goexit' -grep '\Aac\z' testdata/corpus/FuzzWithNilPanic/f45de51cdef30991551e41e882dd7b5404799648a0a00753f44fc966e6153fc1 +go run check_testdata.go FuzzWithNilPanic ! go test -run=FuzzWithBadExit -fuzz=FuzzWithBadExit -fuzztime=5s -parallel=1 -stdout 'testdata[/\\]corpus[/\\]FuzzWithBadExit[/\\]70ba33708cbfb103f1a8e34afef333ba7dc021022b2d9aaa583aabb8058d8d67' +stdout 'testdata[/\\]corpus[/\\]FuzzWithBadExit[/\\]' stdout 'unexpectedly' -grep '\Aad\z' testdata/corpus/FuzzWithBadExit/70ba33708cbfb103f1a8e34afef333ba7dc021022b2d9aaa583aabb8058d8d67 +go run check_testdata.go FuzzWithBadExit -- go.mod -- module m @@ -39,7 +39,6 @@ go 1.16 package fuzz_crash import ( - "bytes" "os" "testing" ) @@ -47,7 +46,7 @@ import ( func FuzzWithBug(f *testing.F) { f.Add([]byte("aa")) f.Fuzz(func(t *testing.T, b []byte) { - if bytes.Equal(b, []byte("ab")) { + if string(b) != "aa" { panic("this input caused a crash!") } }) @@ -56,7 +55,7 @@ func FuzzWithBug(f *testing.F) { func FuzzWithNilPanic(f *testing.F) { f.Add([]byte("aa")) f.Fuzz(func(t *testing.T, b []byte) { - if bytes.Equal(b, []byte("ac")) { + if string(b) != "aa" { panic(nil) } }) @@ -65,8 +64,55 @@ func FuzzWithNilPanic(f *testing.F) { func FuzzWithBadExit(f *testing.F) { f.Add([]byte("aa")) f.Fuzz(func(t *testing.T, b []byte) { - if bytes.Equal(b, []byte("ad")) { + if string(b) != "aa" { os.Exit(1) } }) +} + +-- check_testdata.go -- +// +build ignore + +package main + +import ( + "bytes" + "crypto/sha256" + "fmt" + "io/ioutil" + "os" + "path/filepath" +) + +func main() { + target := os.Args[1] + dir := filepath.Join("testdata/corpus", target) + + files, err := ioutil.ReadDir(dir) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + + if len(files) != 1 { + fmt.Fprintln(os.Stderr, fmt.Errorf("expect only one new mutation to be written to testdata", len(files))) + os.Exit(1) + } + + fname := files[0].Name() + contents, err := ioutil.ReadFile(filepath.Join(dir, fname)) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + if bytes.Equal(contents, []byte("aa")) { + fmt.Fprintln(os.Stderr, fmt.Errorf("newly written testdata entry was not mutated")) + os.Exit(1) + } + // The hash of the bytes in the file should match the filename. + h := []byte(fmt.Sprintf("%x", sha256.Sum256(contents))) + if !bytes.Equal([]byte(fname), h) { + fmt.Fprintln(os.Stderr, fmt.Errorf("hash of bytes %q does not match filename %q", h, fname)) + os.Exit(1) + } } \ No newline at end of file diff --git a/src/cmd/go/testdata/script/test_fuzz_mutate.txt b/src/cmd/go/testdata/script/test_fuzz_mutator.txt similarity index 73% rename from src/cmd/go/testdata/script/test_fuzz_mutate.txt rename to src/cmd/go/testdata/script/test_fuzz_mutator.txt index cbd0838e73..f858dcf354 100644 --- a/src/cmd/go/testdata/script/test_fuzz_mutate.txt +++ b/src/cmd/go/testdata/script/test_fuzz_mutator.txt @@ -10,6 +10,12 @@ go test -fuzz=FuzzA -fuzztime=5s -parallel=1 -log=fuzz go run check_logs.go fuzz fuzz.worker +# Test that the mutator is good enough to find several unique mutations. +! go test -v -fuzz=Fuzz -parallel=1 -fuzztime=30s mutator_test.go +! stdout ok +stdout FAIL +stdout 'mutator found enough edge cases' + -- go.mod -- module m @@ -143,7 +149,7 @@ func checkWorkerLog(r io.Reader) error { sawAMutant = true } } - if err := scan.Err(); err != nil { + if err := scan.Err(); err != nil && err != bufio.ErrTooLong { return err } if !sawAMutant { @@ -151,3 +157,40 @@ func checkWorkerLog(r io.Reader) error { } return nil } + +-- mutator_test.go -- +package fuzz_test + +import ( + "strings" + "testing" +) + +// TODO(katiehockman): re-work this test once we have a better fuzzing engine +// (ie. more mutations, and compiler instrumentation) +func Fuzz(f *testing.F) { + // TODO(katiehockman): simplify this once we can dedupe crashes (e.g. + // replace map with calls to panic, and simply count the number of crashes + // that were added to testdata) + crashes := make(map[string]bool) + // No seed corpus initiated + f.Fuzz(func(t *testing.T, b []byte) { + if len(crashes) >= 150 { + panic("mutator found enough edge cases") + } + + if len(b) < 5 { + return // continue + } + + for i := 0; i < 256; i++ { + s := string(byte(i)) + if strings.HasPrefix(string(b), s) { + crashes["pre-" + s] = true + } + if strings.HasSuffix(string(b), s) { + crashes["suffix-" + s] = true + } + } + }) +} \ No newline at end of file diff --git a/src/internal/fuzz/fuzz.go b/src/internal/fuzz/fuzz.go index aacc053682..2a60e73c7f 100644 --- a/src/internal/fuzz/fuzz.go +++ b/src/internal/fuzz/fuzz.go @@ -47,20 +47,13 @@ func CoordinateFuzzing(ctx context.Context, parallel int, seed [][]byte, corpusD parallel = runtime.GOMAXPROCS(0) } + sharedMemSize := 100 << 20 // 100 MB corpus, err := readCorpusAndCache(seed, corpusDir, cacheDir) if err != nil { return err } - var maxEntryLen int if len(corpus.entries) == 0 { corpus.entries = []corpusEntry{{b: []byte{}}} - maxEntryLen = 0 - } else { - for _, e := range corpus.entries { - if len(e.b) > maxEntryLen { - maxEntryLen = len(e.b) - } - } } // TODO(jayconrod): do we want to support fuzzing different binaries? @@ -78,7 +71,7 @@ func CoordinateFuzzing(ctx context.Context, parallel int, seed [][]byte, corpusD } newWorker := func() (*worker, error) { - mem, err := sharedMemTempFile(maxEntryLen) + mem, err := sharedMemTempFile(sharedMemSize) if err != nil { return nil, err } diff --git a/src/internal/fuzz/mem.go b/src/internal/fuzz/mem.go index 54e3eb737c..663598bb48 100644 --- a/src/internal/fuzz/mem.go +++ b/src/internal/fuzz/mem.go @@ -47,10 +47,9 @@ func sharedMemSize(valueSize int) int { return int(unsafe.Sizeof(sharedMemHeader{})) + valueSize } -// sharedMemTempFile creates a new temporary file large enough to hold a value -// of the given size, then maps it into memory. The file will be removed when -// the Close method is called. -func sharedMemTempFile(valueSize int) (m *sharedMem, err error) { +// sharedMemTempFile creates a new temporary file of the given size, then maps +// it into memory. The file will be removed when the Close method is called. +func sharedMemTempFile(size int) (m *sharedMem, err error) { // Create a temporary file. f, err := ioutil.TempFile("", "fuzz-*") if err != nil { @@ -64,7 +63,7 @@ func sharedMemTempFile(valueSize int) (m *sharedMem, err error) { }() // Resize it to the correct size. - totalSize := sharedMemSize(valueSize) + totalSize := sharedMemSize(size) if err := f.Truncate(int64(totalSize)); err != nil { return nil, err } diff --git a/src/internal/fuzz/mutator.go b/src/internal/fuzz/mutator.go index 6a52e46f6f..377491adcb 100644 --- a/src/internal/fuzz/mutator.go +++ b/src/internal/fuzz/mutator.go @@ -4,14 +4,244 @@ package fuzz -import "math/rand" +import ( + "encoding/binary" + "reflect" + "unsafe" +) -func mutate(b []byte) { - if len(b) == 0 { - return +type mutator struct { + r *pcgRand +} + +func newMutator() *mutator { + return &mutator{r: newPcgRand()} +} + +func (m *mutator) rand(n int) int { + return m.r.intn(n) +} + +func (m *mutator) randByteOrder() binary.ByteOrder { + if m.r.bool() { + return binary.LittleEndian + } + return binary.BigEndian +} + +// chooseLen chooses length of range mutation in range [0,n]. It gives +// preference to shorter ranges. +func (m *mutator) chooseLen(n int) int { + switch x := m.rand(100); { + case x < 90: + return m.rand(min(8, n)) + 1 + case x < 99: + return m.rand(min(32, n)) + 1 + default: + return m.rand(n) + 1 + } +} + +func min(a, b int) int { + if a < b { + return a + } + return b +} + +// mutate performs several mutations directly onto the provided byte slice. +func (m *mutator) mutate(ptrB *[]byte) { + // TODO(jayconrod,katiehockman): make this use zero allocations + // TODO(katiehockman): pull some of these functions into helper methods + // and test that each case is working as expected. + // TODO(katiehockman): perform more types of mutations. + b := *ptrB + defer func() { + oldHdr := (*reflect.SliceHeader)(unsafe.Pointer(ptrB)) + newHdr := (*reflect.SliceHeader)(unsafe.Pointer(&b)) + if oldHdr.Data != newHdr.Data { + panic("data moved to new address") + } + *ptrB = b + }() + + numIters := 1 + m.r.exp2() + for iter := 0; iter < numIters; iter++ { + switch m.rand(10) { + case 0: + // Remove a range of bytes. + if len(b) <= 1 { + iter-- + continue + } + pos0 := m.rand(len(b)) + pos1 := pos0 + m.chooseLen(len(b)-pos0) + copy(b[pos0:], b[pos1:]) + b = b[:len(b)-(pos1-pos0)] + case 1: + // Insert a range of random bytes. + pos := m.rand(len(b) + 1) + n := m.chooseLen(10) + if len(b)+n >= cap(b) { + iter-- + continue + } + b = b[:len(b)+n] + copy(b[pos+n:], b[pos:]) + for i := 0; i < n; i++ { + b[pos+i] = byte(m.rand(256)) + } + case 2: + // Duplicate a range of bytes. + if len(b) <= 1 { + iter-- + continue + } + src := m.rand(len(b)) + dst := m.rand(len(b)) + for dst == src { + dst = m.rand(len(b)) + } + n := m.chooseLen(len(b) - src) + tmp := make([]byte, n) + copy(tmp, b[src:]) + b = b[:len(b)+n] + copy(b[dst+n:], b[dst:]) + copy(b[dst:], tmp) + case 3: + // Copy a range of bytes. + if len(b) <= 1 { + iter-- + continue + } + src := m.rand(len(b)) + dst := m.rand(len(b)) + for dst == src { + dst = m.rand(len(b)) + } + n := m.chooseLen(len(b) - src) + copy(b[dst:], b[src:src+n]) + case 4: + // Bit flip. + if len(b) == 0 { + iter-- + continue + } + pos := m.rand(len(b)) + b[pos] ^= 1 << uint(m.rand(8)) + case 5: + // Set a byte to a random value. + if len(b) == 0 { + iter-- + continue + } + pos := m.rand(len(b)) + b[pos] = byte(m.rand(256)) + case 6: + // Swap 2 bytes. + if len(b) <= 1 { + iter-- + continue + } + src := m.rand(len(b)) + dst := m.rand(len(b)) + for dst == src { + dst = m.rand(len(b)) + } + b[src], b[dst] = b[dst], b[src] + case 7: + // Add/subtract from a byte. + if len(b) == 0 { + iter-- + continue + } + pos := m.rand(len(b)) + v := byte(m.rand(35) + 1) + if m.r.bool() { + b[pos] += v + } else { + b[pos] -= v + } + case 8: + // Add/subtract from a uint16. + if len(b) < 2 { + iter-- + continue + } + v := uint16(m.rand(35) + 1) + if m.r.bool() { + v = 0 - v + } + pos := m.rand(len(b) - 1) + enc := m.randByteOrder() + enc.PutUint16(b[pos:], enc.Uint16(b[pos:])+v) + case 9: + // Add/subtract from a uint32. + if len(b) < 4 { + iter-- + continue + } + v := uint32(m.rand(35) + 1) + if m.r.bool() { + v = 0 - v + } + pos := m.rand(len(b) - 3) + enc := m.randByteOrder() + enc.PutUint32(b[pos:], enc.Uint32(b[pos:])+v) + case 10: + // Add/subtract from a uint64. + if len(b) < 8 { + iter-- + continue + } + v := uint64(m.rand(35) + 1) + if m.r.bool() { + v = 0 - v + } + pos := m.rand(len(b) - 7) + enc := m.randByteOrder() + enc.PutUint64(b[pos:], enc.Uint64(b[pos:])+v) + case 11: + // Replace a byte with an interesting value. + if len(b) == 0 { + iter-- + continue + } + pos := m.rand(len(b)) + b[pos] = byte(interesting8[m.rand(len(interesting8))]) + case 12: + // Replace a uint16 with an interesting value. + if len(b) < 2 { + iter-- + continue + } + pos := m.rand(len(b) - 1) + v := uint16(interesting16[m.rand(len(interesting16))]) + m.randByteOrder().PutUint16(b[pos:], v) + case 13: + // Replace a uint32 with an interesting value. + if len(b) < 4 { + iter-- + continue + } + pos := m.rand(len(b) - 3) + v := uint32(interesting32[m.rand(len(interesting32))]) + m.randByteOrder().PutUint32(b[pos:], v) + } } +} + +var ( + interesting8 = []int8{-128, -1, 0, 1, 16, 32, 64, 100, 127} + interesting16 = []int16{-32768, -129, 128, 255, 256, 512, 1000, 1024, 4096, 32767} + interesting32 = []int32{-2147483648, -100663046, -32769, 32768, 65535, 65536, 100663045, 2147483647} +) - // Mutate a byte in a random position. - pos := rand.Intn(len(b)) - b[pos] = byte(rand.Intn(256)) +func init() { + for _, v := range interesting8 { + interesting16 = append(interesting16, int16(v)) + } + for _, v := range interesting16 { + interesting32 = append(interesting32, int32(v)) + } } diff --git a/src/internal/fuzz/pcg.go b/src/internal/fuzz/pcg.go new file mode 100644 index 0000000000..5f0c1c39f6 --- /dev/null +++ b/src/internal/fuzz/pcg.go @@ -0,0 +1,104 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fuzz + +import ( + "math/bits" + "sync/atomic" + "time" +) + +// The functions in pcg implement a 32 bit PRNG with a 64 bit period: pcg xsh rr +// 64 32. See https://www.pcg-random.org/ for more information. This +// implementation is geared specifically towards the needs of fuzzing: Simple +// creation and use, no reproducibility, no concurrency safety, just the +// necessary methods, optimized for speed. + +var globalInc uint64 // PCG stream + +const multiplier uint64 = 6364136223846793005 + +// pcgRand is a PRNG. It should not be copied or shared. No Rand methods are +// concurrency safe. +type pcgRand struct { + noCopy noCopy // help avoid mistakes: ask vet to ensure that we don't make a copy + state uint64 + inc uint64 +} + +// newPcgRand generates a new, seeded Rand, ready for use. +func newPcgRand() *pcgRand { + r := new(pcgRand) + now := uint64(time.Now().UnixNano()) + inc := atomic.AddUint64(&globalInc, 1) + r.state = now + r.inc = (inc << 1) | 1 + r.step() + r.state += now + r.step() + return r +} + +func (r *pcgRand) step() { + r.state *= multiplier + r.state += r.inc +} + +// uint32 returns a pseudo-random uint32. +func (r *pcgRand) uint32() uint32 { + x := r.state + r.step() + return bits.RotateLeft32(uint32(((x>>18)^x)>>27), -int(x>>59)) +} + +// intn returns a pseudo-random number in [0, n). +// n must fit in a uint32. +func (r *pcgRand) intn(n int) int { + if int(uint32(n)) != n { + panic("large Intn") + } + return int(r.uint32n(uint32(n))) +} + +// uint32n returns a pseudo-random number in [0, n). +// +// For implementation details, see: +// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction +// https://lemire.me/blog/2016/06/30/fast-random-shuffling +func (r *pcgRand) uint32n(n uint32) uint32 { + v := r.uint32() + prod := uint64(v) * uint64(n) + low := uint32(prod) + if low < n { + thresh := uint32(-int32(n)) % n + for low < thresh { + v = r.uint32() + prod = uint64(v) * uint64(n) + low = uint32(prod) + } + } + return uint32(prod >> 32) +} + +// exp2 generates n with probability 1/2^(n+1). +func (r *pcgRand) exp2() int { + return bits.TrailingZeros32(r.uint32()) +} + +// bool generates a random bool. +func (r *pcgRand) bool() bool { + return r.uint32()&1 == 0 +} + +// noCopy may be embedded into structs which must not be copied +// after the first use. +// +// See https://golang.org/issues/8005#issuecomment-190753527 +// for details. +type noCopy struct{} + +// lock is a no-op used by -copylocks checker from `go vet`. +func (*noCopy) lock() {} +func (*noCopy) unlock() {} diff --git a/src/internal/fuzz/worker.go b/src/internal/fuzz/worker.go index ef2a9303ef..8947641996 100644 --- a/src/internal/fuzz/worker.go +++ b/src/internal/fuzz/worker.go @@ -318,7 +318,7 @@ func RunFuzzWorker(ctx context.Context, fn func([]byte) error) error { if err != nil { return err } - srv := &workerServer{workerComm: comm, fuzzFn: fn} + srv := &workerServer{workerComm: comm, fuzzFn: fn, m: newMutator()} return srv.serve(ctx) } @@ -366,6 +366,7 @@ type workerComm struct { // memory after a worker process terminates unexpectedly. type workerServer struct { workerComm + m *mutator // fuzzFn runs the worker's fuzz function on the given input and returns // an error if it finds a crasher (the process may also exit or crash). @@ -441,7 +442,11 @@ func (ws *workerServer) fuzz(ctx context.Context, args fuzzArgs) fuzzResponse { // real heuristic once we have one. return fuzzResponse{Interesting: true} default: - mutate(ws.mem.valueRef()) + b := ws.mem.valueRef() + ws.m.mutate(&b) + // TODO(jayconrod): consider making ws.m.header() contain the whole + // slice header, so the length can be updated when the slice changes + ws.mem.header().length = len(b) if err := ws.fuzzFn(ws.mem.valueRef()); err != nil { return fuzzResponse{Err: err.Error()} } -- 2.48.1