]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.fuzz] internal/fuzz: don't store corpus in memory
authorRoland Shoemaker <roland@golang.org>
Wed, 25 Aug 2021 20:31:19 +0000 (13:31 -0700)
committerRoland Shoemaker <roland@golang.org>
Thu, 2 Sep 2021 21:43:52 +0000 (21:43 +0000)
Instead of holding all corpus data/values in memory, only store seed
inputs added via F.Add in memory, and only load corpus entries which
are written to disk when we need them. This should significantly reduce
the memory required by the coordinator process.

Additionally only load the corpus in the coordinator process, since the
worker has no need for it.

Fixes #46669.

Change-Id: Ic3b0c5e929fdb3e2877b963e6b0fa14e140c1e1d
Reviewed-on: https://go-review.googlesource.com/c/go/+/345096
Trust: Roland Shoemaker <roland@golang.org>
Run-TryBot: Roland Shoemaker <roland@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Jay Conrod <jayconrod@google.com>
src/internal/fuzz/fuzz.go
src/internal/fuzz/worker.go
src/testing/fuzz.go

index 7213a08d506cd3a6f725cc8a9fc300423206eebb..722933a0bf483f259226cf6226a7ede8200633f1 100644 (file)
@@ -254,7 +254,7 @@ func CoordinateFuzzing(ctx context.Context, opts CoordinateFuzzingOpts) (err err
                                                        c.opts.Log,
                                                        "DEBUG new crasher, elapsed: %s, id: %s, parent: %s, gen: %d, size: %d, exec time: %s\n",
                                                        time.Since(c.startTime),
-                                                       result.entry.Name,
+                                                       fileName,
                                                        result.entry.Parent,
                                                        result.entry.Generation,
                                                        len(result.entry.Data),
@@ -303,35 +303,39 @@ func CoordinateFuzzing(ctx context.Context, opts CoordinateFuzzingOpts) (err err
                                        // number of new edges that this result expanded.
                                        // TODO(jayconrod, katiehockman): Don't write a value that's already
                                        // in the corpus.
-                                       if printDebugInfo() {
-                                               fmt.Fprintf(
-                                                       c.opts.Log,
-                                                       "DEBUG new interesting input, elapsed: %s, id: %s, parent: %s, gen: %d, new bits: %d, total bits: %d, size: %d, exec time: %s\n",
-                                                       time.Since(c.startTime),
-                                                       result.entry.Name,
-                                                       result.entry.Parent,
-                                                       result.entry.Generation,
-                                                       countBits(keepCoverage),
-                                                       countBits(c.coverageMask),
-                                                       len(result.entry.Data),
-                                                       result.entryDuration,
-                                               )
-                                       }
                                        if !result.minimizeAttempted && crashMinimizing == nil && c.canMinimize() {
                                                // Send back to workers to find a smaller value that preserves
                                                // at least one new coverage bit.
                                                c.queueForMinimization(result, keepCoverage)
                                        } else {
                                                // Update the coordinator's coverage mask and save the value.
+                                               inputSize := len(result.entry.Data)
                                                if opts.CacheDir != "" {
-                                                       if _, err := writeToCorpus(result.entry.Data, opts.CacheDir); err != nil {
+                                                       filename, err := writeToCorpus(result.entry.Data, opts.CacheDir)
+                                                       if err != nil {
                                                                stop(err)
                                                        }
+                                                       result.entry.Data = nil
+                                                       result.entry.Name = filename
                                                }
                                                c.updateCoverage(keepCoverage)
                                                c.corpus.entries = append(c.corpus.entries, result.entry)
                                                c.inputQueue.enqueue(result.entry)
                                                c.interestingCount++
+                                               if printDebugInfo() {
+                                                       fmt.Fprintf(
+                                                               c.opts.Log,
+                                                               "DEBUG new interesting input, elapsed: %s, id: %s, parent: %s, gen: %d, new bits: %d, total bits: %d, size: %d, exec time: %s\n",
+                                                               time.Since(c.startTime),
+                                                               result.entry.Name,
+                                                               result.entry.Parent,
+                                                               result.entry.Generation,
+                                                               countBits(keepCoverage),
+                                                               countBits(c.coverageMask),
+                                                               inputSize,
+                                                               result.entryDuration,
+                                                       )
+                                               }
                                        }
                                } else {
                                        if printDebugInfo() {
@@ -393,18 +397,17 @@ type corpus struct {
 // packages, but testing can't import this package directly, and we don't want
 // to export this type from testing. Instead, we use the same struct type and
 // use a type alias (not a defined type) for convenience.
-//
-// TODO: split marshalled and unmarshalled types. In most places, we only need
-// one or the other.
 type CorpusEntry = struct {
        Parent string
 
        // Name is the name of the corpus file, if the entry was loaded from the
        // seed corpus. It can be used with -run. For entries added with f.Add and
-       // entries generated by the mutator, Name is empty.
+       // entries generated by the mutator, Name is empty and Data is populated.
        Name string
 
-       // Data is the raw data loaded from a corpus file.
+       // Data is the raw input data. Data should only be populated for initial
+       // seed values added with f.Add. For on-disk corpus files, Data will
+       // be nil.
        Data []byte
 
        // Values is the unmarshaled values from a corpus file.
@@ -413,6 +416,16 @@ type CorpusEntry = struct {
        Generation int
 }
 
+// Data returns the raw input bytes, either from the data struct field,
+// or from disk.
+func CorpusEntryData(ce CorpusEntry) ([]byte, error) {
+       if ce.Data != nil {
+               return ce.Data, nil
+       }
+
+       return os.ReadFile(ce.Name)
+}
+
 type fuzzInput struct {
        // entry is the value to test initially. The worker will randomly mutate
        // values from this starting point.
@@ -580,7 +593,7 @@ func newCoordinator(opts CoordinateFuzzingOpts) (*coordinator, error) {
                data := marshalCorpusFile(vals...)
                h := sha256.Sum256(data)
                name := fmt.Sprintf("%x", h[:4])
-               corpus.entries = append(corpus.entries, CorpusEntry{Name: name, Data: data, Values: vals})
+               corpus.entries = append(corpus.entries, CorpusEntry{Name: name, Data: data})
        }
        c := &coordinator{
                opts:      opts,
@@ -875,7 +888,7 @@ func ReadCorpus(dir string, types []reflect.Type) ([]CorpusEntry, error) {
                        errs = append(errs, fmt.Errorf("%q: %v", filename, err))
                        continue
                }
-               corpus = append(corpus, CorpusEntry{Name: filename, Data: data, Values: vals})
+               corpus = append(corpus, CorpusEntry{Name: filename, Values: vals})
        }
        if len(errs) > 0 {
                return corpus, &MalformedCorpusError{errs: errs}
index 67ec3c762ba9a7167fc462c245abafb2fc74ac90..2f5704094ea65611a6861c707d941fb915f9224f 100644 (file)
@@ -1002,7 +1002,11 @@ func (wc *workerClient) minimize(ctx context.Context, entryIn CorpusEntry, args
                return CorpusEntry{}, minimizeResponse{}, errSharedMemClosed
        }
        mem.header().count = 0
-       mem.setValue(entryIn.Data)
+       inp, err := CorpusEntryData(entryIn)
+       if err != nil {
+               return CorpusEntry{}, minimizeResponse{}, err
+       }
+       mem.setValue(inp)
        wc.memMu <- mem
        defer func() { wc.memMu <- mem }()
 
@@ -1013,10 +1017,6 @@ func (wc *workerClient) minimize(ctx context.Context, entryIn CorpusEntry, args
                return CorpusEntry{}, minimizeResponse{}, errSharedMemClosed
        }
        entryOut.Data = mem.valueCopy()
-       entryOut.Values, err = unmarshalCorpusFile(entryOut.Data)
-       if err != nil {
-               panic(fmt.Sprintf("workerClient.minimize unmarshaling minimized value: %v", err))
-       }
        resp.Count = mem.header().count
 
        return entryOut, resp, callErr
@@ -1032,7 +1032,11 @@ func (wc *workerClient) fuzz(ctx context.Context, entryIn CorpusEntry, args fuzz
                return CorpusEntry{}, fuzzResponse{}, errSharedMemClosed
        }
        mem.header().count = 0
-       mem.setValue(entryIn.Data)
+       inp, err := CorpusEntryData(entryIn)
+       if err != nil {
+               return CorpusEntry{}, fuzzResponse{}, err
+       }
+       mem.setValue(inp)
        wc.memMu <- mem
 
        c := call{Fuzz: &args}
@@ -1044,10 +1048,10 @@ func (wc *workerClient) fuzz(ctx context.Context, entryIn CorpusEntry, args fuzz
        defer func() { wc.memMu <- mem }()
        resp.Count = mem.header().count
 
-       if !bytes.Equal(entryIn.Data, mem.valueRef()) {
+       if !bytes.Equal(inp, mem.valueRef()) {
                panic("workerServer.fuzz modified input")
        }
-       valuesOut, err := unmarshalCorpusFile(entryIn.Data)
+       valuesOut, err := unmarshalCorpusFile(inp)
        if err != nil {
                panic(fmt.Sprintf("unmarshaling fuzz input value after call: %v", err))
        }
@@ -1063,7 +1067,6 @@ func (wc *workerClient) fuzz(ctx context.Context, entryIn CorpusEntry, args fuzz
                Name:       name,
                Parent:     entryIn.Name,
                Data:       dataOut,
-               Values:     valuesOut,
                Generation: entryIn.Generation + 1,
        }
 
index 4892d3f3e9ee267fb7fef62027fc7a15c2088b9a..e567f7d9f4a80c2762e13aca41cc8fba3633c395 100644 (file)
@@ -305,21 +305,33 @@ func (f *F) Fuzz(ff interface{}) {
                types = append(types, t)
        }
 
-       // Check the corpus provided by f.Add
-       for _, c := range f.corpus {
-               if err := f.fuzzContext.checkCorpus(c.Values, types); err != nil {
-                       // TODO: Is there a way to save which line number is associated
-                       // with the f.Add call that failed?
+       // Only load the corpus if we need it
+       if f.fuzzContext.runFuzzWorker == nil {
+               // Check the corpus provided by f.Add
+               for _, c := range f.corpus {
+                       if err := f.fuzzContext.checkCorpus(c.Values, types); err != nil {
+                               // TODO: Is there a way to save which line number is associated
+                               // with the f.Add call that failed?
+                               f.Fatal(err)
+                       }
+               }
+
+               // Load seed corpus
+               c, err := f.fuzzContext.readCorpus(filepath.Join(corpusDir, f.name), types)
+               if err != nil {
                        f.Fatal(err)
                }
-       }
 
-       // Load seed corpus
-       c, err := f.fuzzContext.readCorpus(filepath.Join(corpusDir, f.name), types)
-       if err != nil {
-               f.Fatal(err)
+               // If this is the coordinator process, zero the values, since we don't need to hold
+               // onto them.
+               if f.fuzzContext.coordinateFuzzing != nil {
+                       for i := range c {
+                               c[i].Values = nil
+                       }
+               }
+
+               f.corpus = append(f.corpus, c...)
        }
-       f.corpus = append(f.corpus, c...)
 
        // run calls fn on a given input, as a subtest with its own T.
        // run is analogous to T.Run. The test filtering and cleanup works similarly.