From 572941942026e705a685860a170a60ce371b050e Mon Sep 17 00:00:00 2001 From: Andy Pan Date: Tue, 16 Aug 2022 16:06:23 +0800 Subject: [PATCH] strings: speed up ToUpper()/ToLower() by batch writing data with Builder MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Updates #52371 Updates CL 423874 name old time/op new time/op delta ToUpper/#00-10 2.85ns ± 0% 2.81ns ± 0% -1.31% (p=0.000 n=10+10) ToUpper/ONLYUPPER-10 12.7ns ± 0% 12.5ns ± 0% -1.35% (p=0.000 n=10+10) ToUpper/abc-10 20.9ns ± 1% 20.1ns ± 1% -3.92% (p=0.000 n=8+10) ToUpper/AbC123-10 26.9ns ± 1% 28.5ns ± 0% +5.78% (p=0.000 n=9+9) ToUpper/azAZ09_-10 27.4ns ± 1% 24.5ns ± 0% -10.82% (p=0.000 n=9+9) ToUpper/longStrinGwitHmixofsmaLLandcAps-10 95.9ns ± 1% 100.3ns ± 0% +4.52% (p=0.000 n=9+10) ToUpper/RENAN_BASTOS_93_AOSDAJDJAIDJAIDAJIaidsjjaidijadsjiadjiOOKKO-10 188ns ± 0% 121ns ± 0% -35.52% (p=0.000 n=9+10) ToUpper/longɐstringɐwithɐnonasciiⱯchars-10 168ns ± 0% 164ns ± 0% -2.02% (p=0.000 n=8+10) ToUpper/ɐɐɐɐɐ-10 134ns ± 0% 132ns ± 0% -1.59% (p=0.000 n=9+10) ToUpper/a\u0080\U0010ffff-10 67.6ns ± 0% 66.4ns ± 0% -1.73% (p=0.000 n=10+10) ToLower/#00-10 2.87ns ± 4% 2.83ns ± 0% -1.46% (p=0.004 n=9+9) ToLower/abc-10 6.35ns ± 0% 6.29ns ± 0% -0.98% (p=0.000 n=9+9) ToLower/AbC123-10 25.6ns ± 1% 28.1ns ± 1% +9.81% (p=0.000 n=10+10) ToLower/azAZ09_-10 29.9ns ± 1% 30.1ns ± 1% +0.64% (p=0.023 n=9+10) ToLower/longStrinGwitHmixofsmaLLandcAps-10 96.7ns ± 1% 73.0ns ± 0% -24.50% (p=0.000 n=10+10) ToLower/renan_bastos_93_AOSDAJDJAIDJAIDAJIaidsjjaidijadsjiadjiOOKKO-10 177ns ± 0% 118ns ± 0% -33.61% (p=0.000 n=7+8) ToLower/LONGⱯSTRINGⱯWITHⱯNONASCIIⱯCHARS-10 159ns ± 1% 158ns ± 0% -0.97% (p=0.000 n=8+10) ToLower/ⱭⱭⱭⱭⱭ-10 125ns ± 1% 123ns ± 1% -1.67% (p=0.000 n=9+9) ToLower/A\u0080\U0010ffff-10 68.4ns ± 1% 67.1ns ± 0% -1.95% (p=0.000 n=9+9) name old alloc/op new alloc/op delta ToUpper/#00-10 0.00B 0.00B ~ (all equal) ToUpper/ONLYUPPER-10 0.00B 0.00B ~ (all equal) ToUpper/abc-10 3.00B ± 0% 3.00B ± 0% ~ (all equal) ToUpper/AbC123-10 8.00B ± 0% 8.00B ± 0% ~ (all equal) ToUpper/azAZ09_-10 8.00B ± 0% 8.00B ± 0% ~ (all equal) ToUpper/longStrinGwitHmixofsmaLLandcAps-10 32.0B ± 0% 32.0B ± 0% ~ (all equal) ToUpper/RENAN_BASTOS_93_AOSDAJDJAIDJAIDAJIaidsjjaidijadsjiadjiOOKKO-10 64.0B ± 0% 64.0B ± 0% ~ (all equal) ToUpper/longɐstringɐwithɐnonasciiⱯchars-10 48.0B ± 0% 48.0B ± 0% ~ (all equal) ToUpper/ɐɐɐɐɐ-10 48.0B ± 0% 48.0B ± 0% ~ (all equal) ToUpper/a\u0080\U0010ffff-10 16.0B ± 0% 16.0B ± 0% ~ (all equal) ToLower/#00-10 0.00B 0.00B ~ (all equal) ToLower/abc-10 0.00B 0.00B ~ (all equal) ToLower/AbC123-10 8.00B ± 0% 8.00B ± 0% ~ (all equal) ToLower/azAZ09_-10 8.00B ± 0% 8.00B ± 0% ~ (all equal) ToLower/longStrinGwitHmixofsmaLLandcAps-10 32.0B ± 0% 32.0B ± 0% ~ (all equal) ToLower/renan_bastos_93_AOSDAJDJAIDJAIDAJIaidsjjaidijadsjiadjiOOKKO-10 64.0B ± 0% 64.0B ± 0% ~ (all equal) ToLower/LONGⱯSTRINGⱯWITHⱯNONASCIIⱯCHARS-10 48.0B ± 0% 48.0B ± 0% ~ (all equal) ToLower/ⱭⱭⱭⱭⱭ-10 24.0B ± 0% 24.0B ± 0% ~ (all equal) ToLower/A\u0080\U0010ffff-10 16.0B ± 0% 16.0B ± 0% ~ (all equal) name old allocs/op new allocs/op delta ToUpper/#00-10 0.00 0.00 ~ (all equal) ToUpper/ONLYUPPER-10 0.00 0.00 ~ (all equal) ToUpper/abc-10 1.00 ± 0% 1.00 ± 0% ~ (all equal) ToUpper/AbC123-10 1.00 ± 0% 1.00 ± 0% ~ (all equal) ToUpper/azAZ09_-10 1.00 ± 0% 1.00 ± 0% ~ (all equal) ToUpper/longStrinGwitHmixofsmaLLandcAps-10 1.00 ± 0% 1.00 ± 0% ~ (all equal) ToUpper/RENAN_BASTOS_93_AOSDAJDJAIDJAIDAJIaidsjjaidijadsjiadjiOOKKO-10 1.00 ± 0% 1.00 ± 0% ~ (all equal) ToUpper/longɐstringɐwithɐnonasciiⱯchars-10 1.00 ± 0% 1.00 ± 0% ~ (all equal) ToUpper/ɐɐɐɐɐ-10 2.00 ± 0% 2.00 ± 0% ~ (all equal) ToUpper/a\u0080\U0010ffff-10 1.00 ± 0% 1.00 ± 0% ~ (all equal) ToLower/#00-10 0.00 0.00 ~ (all equal) ToLower/abc-10 0.00 0.00 ~ (all equal) ToLower/AbC123-10 1.00 ± 0% 1.00 ± 0% ~ (all equal) ToLower/azAZ09_-10 1.00 ± 0% 1.00 ± 0% ~ (all equal) ToLower/longStrinGwitHmixofsmaLLandcAps-10 1.00 ± 0% 1.00 ± 0% ~ (all equal) ToLower/renan_bastos_93_AOSDAJDJAIDJAIDAJIaidsjjaidijadsjiadjiOOKKO-10 1.00 ± 0% 1.00 ± 0% ~ (all equal) ToLower/LONGⱯSTRINGⱯWITHⱯNONASCIIⱯCHARS-10 1.00 ± 0% 1.00 ± 0% ~ (all equal) ToLower/ⱭⱭⱭⱭⱭ-10 1.00 ± 0% 1.00 ± 0% ~ (all equal) ToLower/A\u0080\U0010ffff-10 1.00 ± 0% 1.00 ± 0% ~ (all equal) Change-Id: Id3998ac4bae054ba3e6cf30545a257d5992b48be Reviewed-on: https://go-review.googlesource.com/c/go/+/424139 Run-TryBot: Ian Lance Taylor Auto-Submit: Ian Lance Taylor Run-TryBot: Cuong Manh Le TryBot-Result: Gopher Robot Reviewed-by: Ian Lance Taylor Reviewed-by: Joedian Reid --- src/strings/strings.go | 28 ++++++++++++++++++++++++---- src/strings/strings_test.go | 2 ++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/strings/strings.go b/src/strings/strings.go index 4ab78c326b..7921a20e8a 100644 --- a/src/strings/strings.go +++ b/src/strings/strings.go @@ -568,14 +568,24 @@ func ToUpper(s string) string { if !hasLower { return s } - var b Builder + var ( + b Builder + pos int + ) b.Grow(len(s)) for i := 0; i < len(s); i++ { c := s[i] if 'a' <= c && c <= 'z' { c -= 'a' - 'A' + if pos < i { + b.WriteString(s[pos:i]) + } + b.WriteByte(c) + pos = i + 1 } - b.WriteByte(c) + } + if pos < len(s) { + b.WriteString(s[pos:]) } return b.String() } @@ -598,14 +608,24 @@ func ToLower(s string) string { if !hasUpper { return s } - var b Builder + var ( + b Builder + pos int + ) b.Grow(len(s)) for i := 0; i < len(s); i++ { c := s[i] if 'A' <= c && c <= 'Z' { c += 'a' - 'A' + if pos < i { + b.WriteString(s[pos:i]) + } + b.WriteByte(c) + pos = i + 1 } - b.WriteByte(c) + } + if pos < len(s) { + b.WriteString(s[pos:]) } return b.String() } diff --git a/src/strings/strings_test.go b/src/strings/strings_test.go index d6153aa226..6d394f47be 100644 --- a/src/strings/strings_test.go +++ b/src/strings/strings_test.go @@ -549,6 +549,7 @@ var upperTests = []StringTest{ {"AbC123", "ABC123"}, {"azAZ09_", "AZAZ09_"}, {"longStrinGwitHmixofsmaLLandcAps", "LONGSTRINGWITHMIXOFSMALLANDCAPS"}, + {"RENAN BASTOS 93 AOSDAJDJAIDJAIDAJIaidsjjaidijadsjiadjiOOKKO", "RENAN BASTOS 93 AOSDAJDJAIDJAIDAJIAIDSJJAIDIJADSJIADJIOOKKO"}, {"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", "LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS"}, {"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char {"a\u0080\U0010FFFF", "A\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune @@ -560,6 +561,7 @@ var lowerTests = []StringTest{ {"AbC123", "abc123"}, {"azAZ09_", "azaz09_"}, {"longStrinGwitHmixofsmaLLandcAps", "longstringwithmixofsmallandcaps"}, + {"renan bastos 93 AOSDAJDJAIDJAIDAJIaidsjjaidijadsjiadjiOOKKO", "renan bastos 93 aosdajdjaidjaidajiaidsjjaidijadsjiadjiookko"}, {"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", "long\u0250string\u0250with\u0250nonascii\u0250chars"}, {"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char {"A\u0080\U0010FFFF", "a\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune -- 2.48.1