From: Julien Cretel <jub0bsinthecloud@gmail.com>
Date: Mon, 25 Aug 2025 20:38:20 +0000 (+0000)
Subject: bytes, strings: speed up TrimSpace
X-Git-Tag: go1.26rc1~997
X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=89d41d254a;p=gostls13.git

bytes, strings: speed up TrimSpace

This change lifts bounds checks out of loops in the TrimSpace functions,
among other micro-optimizations. Here are some benchmark results
(no change to allocations):

goos: darwin
goarch: amd64
pkg: bytes
cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz
                         │     old     │                 new                 │
                         │   sec/op    │   sec/op     vs base                │
TrimSpace/NoTrim-8         4.406n ± 0%   3.829n ± 1%  -13.11% (p=0.000 n=20)
TrimSpace/ASCII-8          7.688n ± 1%   5.872n ± 1%  -23.61% (p=0.000 n=20)
TrimSpace/SomeNonASCII-8   82.25n ± 1%   81.00n ± 1%   -1.51% (p=0.001 n=20)
TrimSpace/JustNonASCII-8   131.6n ± 8%   132.2n ± 1%        ~ (p=0.899 n=20)
geomean                    24.61n        22.15n        -9.99%

pkg: strings
                         │     old     │                 new                 │
                         │   sec/op    │   sec/op     vs base                │
TrimSpace/NoTrim-8         4.178n ± 0%   3.857n ± 2%   -7.68% (p=0.001 n=20)
TrimSpace/ASCII-8          7.708n ± 0%   5.585n ± 1%  -27.55% (p=0.000 n=20)
TrimSpace/SomeNonASCII-8   98.70n ± 1%   88.54n ± 1%  -10.30% (p=0.000 n=20)
TrimSpace/JustNonASCII-8   132.8n ± 2%   123.2n ± 0%   -7.16% (p=0.000 n=20)
geomean                    25.49n        22.02n       -13.61%

Change-Id: I523f03a909c82a51940b44c7b2634985b7447982
GitHub-Last-Rev: 35163f04c63ce2ef5e9e831c4371750504edb892
GitHub-Pull-Request: golang/go#75127
Reviewed-on: https://go-review.googlesource.com/c/go/+/698735
Reviewed-by: Sean Liao <sean@liao.dev>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Auto-Submit: Sean Liao <sean@liao.dev>
Reviewed-by: Keith Randall <khr@google.com>
---

diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go
index a0a8fa0b29..ce2e004910 100644
--- a/src/bytes/bytes.go
+++ b/src/bytes/bytes.go
@@ -1117,41 +1117,34 @@ func trimRightUnicode(s []byte, cutset string) []byte {
 // TrimSpace returns a subslice of s by slicing off all leading and
 // trailing white space, as defined by Unicode.
 func TrimSpace(s []byte) []byte {
-	// Fast path for ASCII: look for the first ASCII non-space byte
-	start := 0
-	for ; start < len(s); start++ {
-		c := s[start]
+	// Fast path for ASCII: look for the first ASCII non-space byte.
+	for lo, c := range s {
 		if c >= utf8.RuneSelf {
 			// If we run into a non-ASCII byte, fall back to the
-			// slower unicode-aware method on the remaining bytes
-			return TrimFunc(s[start:], unicode.IsSpace)
-		}
-		if asciiSpace[c] == 0 {
-			break
+			// slower unicode-aware method on the remaining bytes.
+			return TrimFunc(s[lo:], unicode.IsSpace)
 		}
-	}
-
-	// Now look for the first ASCII non-space byte from the end
-	stop := len(s)
-	for ; stop > start; stop-- {
-		c := s[stop-1]
-		if c >= utf8.RuneSelf {
-			return TrimFunc(s[start:stop], unicode.IsSpace)
+		if asciiSpace[c] != 0 {
+			continue
 		}
-		if asciiSpace[c] == 0 {
-			break
+		s = s[lo:]
+		// Now look for the first ASCII non-space byte from the end.
+		for hi := len(s) - 1; hi >= 0; hi-- {
+			c := s[hi]
+			if c >= utf8.RuneSelf {
+				return TrimFunc(s[:hi+1], unicode.IsSpace)
+			}
+			if asciiSpace[c] == 0 {
+				// At this point, s[:hi+1] starts and ends with ASCII
+				// non-space bytes, so we're done. Non-ASCII cases have
+				// already been handled above.
+				return s[:hi+1]
+			}
 		}
 	}
-
-	// At this point s[start:stop] starts and ends with an ASCII
-	// non-space bytes, so we're done. Non-ASCII cases have already
-	// been handled above.
-	if start == stop {
-		// Special case to preserve previous TrimLeftFunc behavior,
-		// returning nil instead of empty slice if all spaces.
-		return nil
-	}
-	return s[start:stop]
+	// Special case to preserve previous TrimLeftFunc behavior,
+	// returning nil instead of empty slice if all spaces.
+	return nil
 }
 
 // Runes interprets s as a sequence of UTF-8-encoded code points.
diff --git a/src/strings/strings.go b/src/strings/strings.go
index 91c6ddef66..74007977d9 100644
--- a/src/strings/strings.go
+++ b/src/strings/strings.go
@@ -1091,37 +1091,32 @@ func trimRightUnicode(s, cutset string) string {
 // TrimSpace returns a slice of the string s, with all leading
 // and trailing white space removed, as defined by Unicode.
 func TrimSpace(s string) string {
-	// Fast path for ASCII: look for the first ASCII non-space byte
-	start := 0
-	for ; start < len(s); start++ {
-		c := s[start]
+	// Fast path for ASCII: look for the first ASCII non-space byte.
+	for lo, c := range []byte(s) {
 		if c >= utf8.RuneSelf {
 			// If we run into a non-ASCII byte, fall back to the
-			// slower unicode-aware method on the remaining bytes
-			return TrimFunc(s[start:], unicode.IsSpace)
-		}
-		if asciiSpace[c] == 0 {
-			break
+			// slower unicode-aware method on the remaining bytes.
+			return TrimFunc(s[lo:], unicode.IsSpace)
 		}
-	}
-
-	// Now look for the first ASCII non-space byte from the end
-	stop := len(s)
-	for ; stop > start; stop-- {
-		c := s[stop-1]
-		if c >= utf8.RuneSelf {
-			// start has been already trimmed above, should trim end only
-			return TrimRightFunc(s[start:stop], unicode.IsSpace)
+		if asciiSpace[c] != 0 {
+			continue
 		}
-		if asciiSpace[c] == 0 {
-			break
+		s = s[lo:]
+		// Now look for the first ASCII non-space byte from the end.
+		for hi := len(s) - 1; hi >= 0; hi-- {
+			c := s[hi]
+			if c >= utf8.RuneSelf {
+				return TrimRightFunc(s[:hi+1], unicode.IsSpace)
+			}
+			if asciiSpace[c] == 0 {
+				// At this point, s[:hi+1] starts and ends with ASCII
+				// non-space bytes, so we're done. Non-ASCII cases have
+				// already been handled above.
+				return s[:hi+1]
+			}
 		}
 	}
-
-	// At this point s[start:stop] starts and ends with an ASCII
-	// non-space bytes, so we're done. Non-ASCII cases have already
-	// been handled above.
-	return s[start:stop]
+	return ""
 }
 
 // TrimPrefix returns s without the provided leading prefix string.