From: Julien Cretel Date: Mon, 25 Aug 2025 20:38:20 +0000 (+0000) Subject: bytes, strings: speed up TrimSpace X-Git-Tag: go1.26rc1~997 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=89d41d254a;p=gostls13.git bytes, strings: speed up TrimSpace This change lifts bounds checks out of loops in the TrimSpace functions, among other micro-optimizations. Here are some benchmark results (no change to allocations): goos: darwin goarch: amd64 pkg: bytes cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz │ old │ new │ │ sec/op │ sec/op vs base │ TrimSpace/NoTrim-8 4.406n ± 0% 3.829n ± 1% -13.11% (p=0.000 n=20) TrimSpace/ASCII-8 7.688n ± 1% 5.872n ± 1% -23.61% (p=0.000 n=20) TrimSpace/SomeNonASCII-8 82.25n ± 1% 81.00n ± 1% -1.51% (p=0.001 n=20) TrimSpace/JustNonASCII-8 131.6n ± 8% 132.2n ± 1% ~ (p=0.899 n=20) geomean 24.61n 22.15n -9.99% pkg: strings │ old │ new │ │ sec/op │ sec/op vs base │ TrimSpace/NoTrim-8 4.178n ± 0% 3.857n ± 2% -7.68% (p=0.001 n=20) TrimSpace/ASCII-8 7.708n ± 0% 5.585n ± 1% -27.55% (p=0.000 n=20) TrimSpace/SomeNonASCII-8 98.70n ± 1% 88.54n ± 1% -10.30% (p=0.000 n=20) TrimSpace/JustNonASCII-8 132.8n ± 2% 123.2n ± 0% -7.16% (p=0.000 n=20) geomean 25.49n 22.02n -13.61% Change-Id: I523f03a909c82a51940b44c7b2634985b7447982 GitHub-Last-Rev: 35163f04c63ce2ef5e9e831c4371750504edb892 GitHub-Pull-Request: golang/go#75127 Reviewed-on: https://go-review.googlesource.com/c/go/+/698735 Reviewed-by: Sean Liao LUCI-TryBot-Result: Go LUCI Reviewed-by: Cherry Mui Auto-Submit: Sean Liao Reviewed-by: Keith Randall --- diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go index a0a8fa0b29..ce2e004910 100644 --- a/src/bytes/bytes.go +++ b/src/bytes/bytes.go @@ -1117,41 +1117,34 @@ func trimRightUnicode(s []byte, cutset string) []byte { // TrimSpace returns a subslice of s by slicing off all leading and // trailing white space, as defined by Unicode. func TrimSpace(s []byte) []byte { - // Fast path for ASCII: look for the first ASCII non-space byte - start := 0 - for ; start < len(s); start++ { - c := s[start] + // Fast path for ASCII: look for the first ASCII non-space byte. + for lo, c := range s { if c >= utf8.RuneSelf { // If we run into a non-ASCII byte, fall back to the - // slower unicode-aware method on the remaining bytes - return TrimFunc(s[start:], unicode.IsSpace) - } - if asciiSpace[c] == 0 { - break + // slower unicode-aware method on the remaining bytes. + return TrimFunc(s[lo:], unicode.IsSpace) } - } - - // Now look for the first ASCII non-space byte from the end - stop := len(s) - for ; stop > start; stop-- { - c := s[stop-1] - if c >= utf8.RuneSelf { - return TrimFunc(s[start:stop], unicode.IsSpace) + if asciiSpace[c] != 0 { + continue } - if asciiSpace[c] == 0 { - break + s = s[lo:] + // Now look for the first ASCII non-space byte from the end. + for hi := len(s) - 1; hi >= 0; hi-- { + c := s[hi] + if c >= utf8.RuneSelf { + return TrimFunc(s[:hi+1], unicode.IsSpace) + } + if asciiSpace[c] == 0 { + // At this point, s[:hi+1] starts and ends with ASCII + // non-space bytes, so we're done. Non-ASCII cases have + // already been handled above. + return s[:hi+1] + } } } - - // At this point s[start:stop] starts and ends with an ASCII - // non-space bytes, so we're done. Non-ASCII cases have already - // been handled above. - if start == stop { - // Special case to preserve previous TrimLeftFunc behavior, - // returning nil instead of empty slice if all spaces. - return nil - } - return s[start:stop] + // Special case to preserve previous TrimLeftFunc behavior, + // returning nil instead of empty slice if all spaces. + return nil } // Runes interprets s as a sequence of UTF-8-encoded code points. diff --git a/src/strings/strings.go b/src/strings/strings.go index 91c6ddef66..74007977d9 100644 --- a/src/strings/strings.go +++ b/src/strings/strings.go @@ -1091,37 +1091,32 @@ func trimRightUnicode(s, cutset string) string { // TrimSpace returns a slice of the string s, with all leading // and trailing white space removed, as defined by Unicode. func TrimSpace(s string) string { - // Fast path for ASCII: look for the first ASCII non-space byte - start := 0 - for ; start < len(s); start++ { - c := s[start] + // Fast path for ASCII: look for the first ASCII non-space byte. + for lo, c := range []byte(s) { if c >= utf8.RuneSelf { // If we run into a non-ASCII byte, fall back to the - // slower unicode-aware method on the remaining bytes - return TrimFunc(s[start:], unicode.IsSpace) - } - if asciiSpace[c] == 0 { - break + // slower unicode-aware method on the remaining bytes. + return TrimFunc(s[lo:], unicode.IsSpace) } - } - - // Now look for the first ASCII non-space byte from the end - stop := len(s) - for ; stop > start; stop-- { - c := s[stop-1] - if c >= utf8.RuneSelf { - // start has been already trimmed above, should trim end only - return TrimRightFunc(s[start:stop], unicode.IsSpace) + if asciiSpace[c] != 0 { + continue } - if asciiSpace[c] == 0 { - break + s = s[lo:] + // Now look for the first ASCII non-space byte from the end. + for hi := len(s) - 1; hi >= 0; hi-- { + c := s[hi] + if c >= utf8.RuneSelf { + return TrimRightFunc(s[:hi+1], unicode.IsSpace) + } + if asciiSpace[c] == 0 { + // At this point, s[:hi+1] starts and ends with ASCII + // non-space bytes, so we're done. Non-ASCII cases have + // already been handled above. + return s[:hi+1] + } } } - - // At this point s[start:stop] starts and ends with an ASCII - // non-space bytes, so we're done. Non-ASCII cases have already - // been handled above. - return s[start:stop] + return "" } // TrimPrefix returns s without the provided leading prefix string.