]> Cypherpunks repositories - gostls13.git/commitdiff
internal/fuzz: centralize corpus entry addition
authorRoland Shoemaker <roland@golang.org>
Mon, 1 Nov 2021 17:03:36 +0000 (10:03 -0700)
committerRoland Shoemaker <roland@golang.org>
Thu, 27 Jan 2022 16:07:55 +0000 (16:07 +0000)
Adds an addCorpusEntry method to coordinator which manages checking for
duplicate entries, writing entries to the cache directory, and adding
entries to the corpus. Also moves readCache to be a method on the
coordinator.

Fixes #50606

Change-Id: Id6721384a2ad1cfb4c5471cf0cd0a7510d250a6c
Reviewed-on: https://go-review.googlesource.com/c/go/+/360394
Trust: Katie Hockman <katie@golang.org>
Reviewed-by: Katie Hockman <katie@golang.org>
Trust: Roland Shoemaker <roland@golang.org>
Run-TryBot: Roland Shoemaker <roland@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>

src/internal/fuzz/fuzz.go

index 37b6d2b391e6bfef99781021193dbc3451260336..73f32dd4c73107a93798cb30acb68e1952a4f577 100644 (file)
@@ -316,32 +316,15 @@ func CoordinateFuzzing(ctx context.Context, opts CoordinateFuzzingOpts) (err err
                                        } else {
                                                // Update the coordinator's coverage mask and save the value.
                                                inputSize := len(result.entry.Data)
-                                               if opts.CacheDir != "" {
-                                                       // It is possible that the input that was discovered is already
-                                                       // present in the corpus, but the worker produced a coverage map
-                                                       // that still expanded our total coverage (this may happen due to
-                                                       // flakiness in the coverage counters). In order to prevent adding
-                                                       // duplicate entries to the corpus (and re-writing the file on
-                                                       // disk), skip it if the on disk file already exists.
-                                                       // TODO(roland): this check is limited in that it will only be
-                                                       // applied if we are using the CacheDir. Another option would be
-                                                       // to iterate through the corpus and check if it is already present,
-                                                       // which would catch cases where we are not caching entries.
-                                                       // A slightly faster approach would be to keep some kind of map of
-                                                       // entry hashes, which would allow us to avoid iterating through
-                                                       // all entries.
-                                                       _, err = os.Stat(result.entry.Path)
-                                                       if err == nil {
-                                                               continue
-                                                       }
-                                                       err := writeToCorpus(&result.entry, opts.CacheDir)
-                                                       if err != nil {
-                                                               stop(err)
-                                                       }
-                                                       result.entry.Data = nil
+                                               duplicate, err := c.addCorpusEntries(true, result.entry)
+                                               if err != nil {
+                                                       stop(err)
+                                                       break
+                                               }
+                                               if duplicate {
+                                                       continue
                                                }
                                                c.updateCoverage(keepCoverage)
-                                               c.corpus.entries = append(c.corpus.entries, result.entry)
                                                c.inputQueue.enqueue(result.entry)
                                                c.interestingCount++
                                                if shouldPrintDebugInfo() {
@@ -433,6 +416,28 @@ func (e *crashError) CrashPath() string {
 
 type corpus struct {
        entries []CorpusEntry
+       hashes  map[[sha256.Size]byte]bool
+}
+
+func (c *coordinator) addCorpusEntries(addToCache bool, entries ...CorpusEntry) (bool, error) {
+       for _, e := range entries {
+               h := sha256.Sum256(e.Data)
+               if c.corpus.hashes[h] {
+                       return true, nil
+               }
+               if addToCache {
+                       if err := writeToCorpus(&e, c.opts.CacheDir); err != nil {
+                               return false, err
+                       }
+                       // For entries written to disk, we don't hold onto the bytes,
+                       // since the corpus would consume a significant amount of
+                       // memory.
+                       e.Data = nil
+               }
+               c.corpus.hashes[h] = true
+               c.corpus.entries = append(c.corpus.entries, e)
+       }
+       return false, nil
 }
 
 // CorpusEntry represents an individual input for fuzzing.
@@ -640,18 +645,17 @@ func newCoordinator(opts CoordinateFuzzingOpts) (*coordinator, error) {
                        opts.Seed[i].Data = marshalCorpusFile(opts.Seed[i].Values...)
                }
        }
-       corpus, err := readCache(opts.Seed, opts.Types, opts.CacheDir)
-       if err != nil {
-               return nil, err
-       }
        c := &coordinator{
                opts:        opts,
                startTime:   time.Now(),
                inputC:      make(chan fuzzInput),
                minimizeC:   make(chan fuzzMinimizeInput),
                resultC:     make(chan fuzzResult),
-               corpus:      corpus,
                timeLastLog: time.Now(),
+               corpus:      corpus{hashes: make(map[[sha256.Size]byte]bool)},
+       }
+       if err := c.readCache(); err != nil {
+               return nil, err
        }
        if opts.MinimizeLimit > 0 || opts.MinimizeTimeout > 0 {
                for _, t := range opts.Types {
@@ -691,7 +695,7 @@ func newCoordinator(opts CoordinateFuzzingOpts) (*coordinator, error) {
                data := marshalCorpusFile(vals...)
                h := sha256.Sum256(data)
                name := fmt.Sprintf("%x", h[:4])
-               c.corpus.entries = append(c.corpus.entries, CorpusEntry{Path: name, Data: data})
+               c.addCorpusEntries(false, CorpusEntry{Path: name, Data: data})
        }
 
        return c, nil
@@ -908,22 +912,25 @@ func (c *coordinator) elapsed() time.Duration {
 //
 // TODO(fuzzing): need a mechanism that can remove values that
 // aren't useful anymore, for example, because they have the wrong type.
-func readCache(seed []CorpusEntry, types []reflect.Type, cacheDir string) (corpus, error) {
-       var c corpus
-       c.entries = append(c.entries, seed...)
-       entries, err := ReadCorpus(cacheDir, types)
+func (c *coordinator) readCache() error {
+       if _, err := c.addCorpusEntries(false, c.opts.Seed...); err != nil {
+               return err
+       }
+       entries, err := ReadCorpus(c.opts.CacheDir, c.opts.Types)
        if err != nil {
                if _, ok := err.(*MalformedCorpusError); !ok {
                        // It's okay if some files in the cache directory are malformed and
                        // are not included in the corpus, but fail if it's an I/O error.
-                       return corpus{}, err
+                       return err
                }
                // TODO(jayconrod,katiehockman): consider printing some kind of warning
                // indicating the number of files which were skipped because they are
                // malformed.
        }
-       c.entries = append(c.entries, entries...)
-       return c, nil
+       if _, err := c.addCorpusEntries(false, entries...); err != nil {
+               return err
+       }
+       return nil
 }
 
 // MalformedCorpusError is an error found while reading the corpus from the