From fde4cc2a3189e2c964a0ce49de3cbe79ebedf985 Mon Sep 17 00:00:00 2001 From: "Bryan C. Mills" Date: Tue, 5 Oct 2021 10:35:31 -0400 Subject: [PATCH] testing: reduce memory used by subtest names MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This is heavily based on CL 341336 by Joe Tsai and CL 351452 by Jay Conrod. T.Run and T.Name use a map[string]int64 to hold the next suffix to use when duplicate names are passed to T.Run. This map necessarily retains one entry per unique name. However, it's a waste of memory to retain one entry per duplicate name: when we encounter the Nth duplicate, we know that names 00 through N-1 have been used just by looking at N. We do still need to store (and check for collisions againsts) explicit names provided by the caller. For example, if the user passes in "a", then "a#01", then "a" again, we cannot deduplicate the second "a" to "a#01" — we need to instead skip ahead to "a#02". We can do so by checking the count of "a", then generating a proposed deduplicated name, then double-checking that proposed name against only the explicit names so far. This somewhat reduces memory usage for tests that spawn large numbers of duplicate subtests, but doesn't solve the problem of memory growth for fuzzing — we still have to track all of the explicit, user-provided subtest names, and in a long-running fuzz test that set alone may be unbounded. This fixes memory growth for the example described in https://golang.org/issue/44517#issuecomment-897104060, but not the one in https://golang.org/issue/44517#issuecomment-933825661. For #44517 Change-Id: Ia159ecfcf44561ba67508d3af6377c27856df31d Reviewed-on: https://go-review.googlesource.com/c/go/+/354749 Trust: Bryan C. Mills Run-TryBot: Bryan C. Mills TryBot-Result: Go Bot Reviewed-by: Ian Lance Taylor Reviewed-by: Emmanuel Odeke Reviewed-by: Jay Conrod --- src/testing/match.go | 78 +++++++++++++++++++++++------ src/testing/match_test.go | 101 +++++++++++++++++++++++++------------- 2 files changed, 131 insertions(+), 48 deletions(-) diff --git a/src/testing/match.go b/src/testing/match.go index c6ff429fe4..d530f70c26 100644 --- a/src/testing/match.go +++ b/src/testing/match.go @@ -17,8 +17,13 @@ type matcher struct { filter filterMatch matchFunc func(pat, str string) (bool, error) - mu sync.Mutex - subNames map[string]int64 + mu sync.Mutex + + // subNames is used to deduplicate subtest names. + // Each key is the subtest name joined to the deduplicated name of the parent test. + // Each value is the count of the number of occurrences of the given subtest name + // already seen. + subNames map[string]int32 } type filterMatch interface { @@ -54,7 +59,7 @@ func newMatcher(matchString func(pat, str string) (bool, error), patterns, name return &matcher{ filter: impl, matchFunc: matchString, - subNames: map[string]int64{}, + subNames: map[string]int32{}, } } @@ -189,22 +194,65 @@ func splitRegexp(s string) filterMatch { // unique creates a unique name for the given parent and subname by affixing it // with one or more counts, if necessary. func (m *matcher) unique(parent, subname string) string { - name := fmt.Sprintf("%s/%s", parent, subname) - empty := subname == "" + base := parent + "/" + subname + for { - next, exists := m.subNames[name] - if !empty && !exists { - m.subNames[name] = 1 // next count is 1 - return name + n := m.subNames[base] + if n < 0 { + panic("subtest count overflow") } - // Name was already used. We increment with the count and append a - // string with the count. - m.subNames[name] = next + 1 + m.subNames[base] = n + 1 + + if n == 0 && subname != "" { + prefix, nn := parseSubtestNumber(base) + if len(prefix) < len(base) && nn < m.subNames[prefix] { + // This test is explicitly named like "parent/subname#NN", + // and #NN was already used for the NNth occurrence of "parent/subname". + // Loop to add a disambiguating suffix. + continue + } + return base + } + + name := fmt.Sprintf("%s#%02d", base, n) + if m.subNames[name] != 0 { + // This is the nth occurrence of base, but the name "parent/subname#NN" + // collides with the first occurrence of a subtest *explicitly* named + // "parent/subname#NN". Try the next number. + continue + } + + return name + } +} + +// parseSubtestNumber splits a subtest name into a "#%02d"-formatted int32 +// suffix (if present), and a prefix preceding that suffix (always). +func parseSubtestNumber(s string) (prefix string, nn int32) { + i := strings.LastIndex(s, "#") + if i < 0 { + return s, 0 + } + + prefix, suffix := s[:i], s[i+1:] + if len(suffix) < 2 || (len(suffix) > 2 && suffix[0] == '0') { + // Even if suffix is numeric, it is not a possible output of a "%02" format + // string: it has either too few digits or too many leading zeroes. + return s, 0 + } + if suffix == "00" { + if !strings.HasSuffix(prefix, "/") { + // We only use "#00" as a suffix for subtests named with the empty + // string — it isn't a valid suffix if the subtest name is non-empty. + return s, 0 + } + } - // Add a count to guarantee uniqueness. - name = fmt.Sprintf("%s#%02d", name, next) - empty = false + n, err := strconv.ParseInt(suffix, 10, 32) + if err != nil || n < 0 { + return s, 0 } + return prefix, int32(n) } // rewrite rewrites a subname to having only printable characters and no white diff --git a/src/testing/match_test.go b/src/testing/match_test.go index 9ceadbb31d..206ac0b651 100644 --- a/src/testing/match_test.go +++ b/src/testing/match_test.go @@ -149,49 +149,84 @@ func TestMatcher(t *T) { } } +var namingTestCases = []struct{ name, want string }{ + // Uniqueness + {"", "x/#00"}, + {"", "x/#01"}, + {"#0", "x/#0"}, // Doesn't conflict with #00 because the number of digits differs. + {"#00", "x/#00#01"}, // Conflicts with implicit #00 (used above), so add a suffix. + {"#", "x/#"}, + {"#", "x/##01"}, + + {"t", "x/t"}, + {"t", "x/t#01"}, + {"t", "x/t#02"}, + {"t#00", "x/t#00"}, // Explicit "#00" doesn't conflict with the unsuffixed first subtest. + + {"a#01", "x/a#01"}, // user has subtest with this name. + {"a", "x/a"}, // doesn't conflict with this name. + {"a", "x/a#02"}, // This string is claimed now, so resume + {"a", "x/a#03"}, // with counting. + {"a#02", "x/a#02#01"}, // We already used a#02 once, so add a suffix. + + {"b#00", "x/b#00"}, + {"b", "x/b"}, // Implicit 0 doesn't conflict with explicit "#00". + {"b", "x/b#01"}, + {"b#9223372036854775807", "x/b#9223372036854775807"}, // MaxInt64 + {"b", "x/b#02"}, + {"b", "x/b#03"}, + + // Sanitizing + {"A:1 B:2", "x/A:1_B:2"}, + {"s\t\r\u00a0", "x/s___"}, + {"\x01", `x/\x01`}, + {"\U0010ffff", `x/\U0010ffff`}, +} + func TestNaming(t *T) { m := newMatcher(regexp.MatchString, "", "") - parent := &common{name: "x", level: 1} // top-level test. - // Rig the matcher with some preloaded values. - m.subNames["x/b"] = 1000 - - testCases := []struct { - name, want string - }{ - // Uniqueness - {"", "x/#00"}, - {"", "x/#01"}, - - {"t", "x/t"}, - {"t", "x/t#01"}, - {"t", "x/t#02"}, - - {"a#01", "x/a#01"}, // user has subtest with this name. - {"a", "x/a"}, // doesn't conflict with this name. - {"a", "x/a#01#01"}, // conflict, add disambiguating string. - {"a", "x/a#02"}, // This string is claimed now, so resume - {"a", "x/a#03"}, // with counting. - {"a#02", "x/a#02#01"}, - - {"b", "x/b#1000"}, // rigged, see above - {"b", "x/b#1001"}, - - // // Sanitizing - {"A:1 B:2", "x/A:1_B:2"}, - {"s\t\r\u00a0", "x/s___"}, - {"\x01", `x/\x01`}, - {"\U0010ffff", `x/\U0010ffff`}, - } - - for i, tc := range testCases { + for i, tc := range namingTestCases { if got, _, _ := m.fullName(parent, tc.name); got != tc.want { t.Errorf("%d:%s: got %q; want %q", i, tc.name, got, tc.want) } } } +func FuzzNaming(f *F) { + for _, tc := range namingTestCases { + f.Add(tc.name) + } + parent := &common{name: "x", level: 1} + var m *matcher + var seen map[string]string + reset := func() { + m = newMatcher(regexp.MatchString, "", "") + seen = make(map[string]string) + } + reset() + + f.Fuzz(func(t *T, subname string) { + if len(subname) > 10 { + // Long names attract the OOM killer. + t.Skip() + } + name := m.unique(parent.name, subname) + if !strings.Contains(name, "/"+subname) { + t.Errorf("name %q does not contain subname %q", name, subname) + } + if prev, ok := seen[name]; ok { + t.Errorf("name %q generated by both %q and %q", name, prev, subname) + } + if len(seen) > 1e6 { + // Free up memory. + reset() + } + seen[name] = subname + }) +} + // GoString returns a string that is more readable than the default, which makes // it easier to read test errors. func (m alternationMatch) GoString() string { -- 2.50.0