]> Cypherpunks repositories - gostls13.git/commitdiff
strings: optimize ToUpper
authorAgniva De Sarker <agnivade@yahoo.co.in>
Thu, 28 Sep 2017 09:37:48 +0000 (15:07 +0530)
committerIlya Tocar <ilya.tocar@intel.com>
Tue, 7 Nov 2017 18:44:50 +0000 (18:44 +0000)
Handling the ASCII case inline and call unicode.ToUpper only
for non-ascii cases.

Gives good improvements for the ascii case and minor perf
degrade for non-ascii case

name                                     old time/op    new time/op    delta
ToUpper/#00                                11.7ns ± 8%     8.0ns ± 1%  -31.95%  (p=0.008 n=5+5)
ToUpper/ONLYUPPER                          45.6ns ± 5%    19.9ns ± 1%  -56.40%  (p=0.008 n=5+5)
ToUpper/abc                                77.4ns ± 1%    57.0ns ± 1%  -26.32%  (p=0.008 n=5+5)
ToUpper/AbC123                             92.1ns ± 4%    67.7ns ± 2%  -26.57%  (p=0.008 n=5+5)
ToUpper/azAZ09_                             105ns ± 6%      67ns ± 2%  -36.26%  (p=0.000 n=5+4)
ToUpper/longStrinGwitHmixofsmaLLandcAps     255ns ± 1%     140ns ± 1%  -45.01%  (p=0.029 n=4+4)
ToUpper/longɐstringɐwithɐnonasciiⱯchars     440ns ± 1%     447ns ± 0%   +1.49%  (p=0.016 n=5+4)
ToUpper/ɐɐɐɐɐ                               370ns ± 4%     366ns ± 1%     ~     (p=0.667 n=5+5)

name                                     old alloc/op   new alloc/op   delta
ToUpper/#00                                 0.00B          0.00B          ~     (all equal)
ToUpper/ONLYUPPER                           0.00B          0.00B          ~     (all equal)
ToUpper/abc                                 16.0B ± 0%      6.0B ± 0%  -62.50%  (p=0.008 n=5+5)
ToUpper/AbC123                              16.0B ± 0%     16.0B ± 0%     ~     (all equal)
ToUpper/azAZ09_                             24.0B ± 0%     16.0B ± 0%  -33.33%  (p=0.008 n=5+5)
ToUpper/longStrinGwitHmixofsmaLLandcAps     80.0B ± 0%     64.0B ± 0%  -20.00%  (p=0.008 n=5+5)
ToUpper/longɐstringɐwithɐnonasciiⱯchars     96.0B ± 0%     96.0B ± 0%     ~     (all equal)
ToUpper/ɐɐɐɐɐ                               64.0B ± 0%     64.0B ± 0%     ~     (all equal)

Ran on a machine with Intel(R) Core(TM) i5-5200U CPU @ 2.20GHz

Updates #17859

Change-Id: I0735ac4a4a36e8a8f6cc06f2c16b871f12b4abf9
Reviewed-on: https://go-review.googlesource.com/68370
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>

src/strings/strings.go
src/strings/strings_test.go

index a7941fbb90f42ed52f386bf5fddb03fe72363a3a..73bba9278c1023baadbb4dc28fbf3cae7b5df6ad 100644 (file)
@@ -542,7 +542,33 @@ func Repeat(s string, count int) string {
 }
 
 // ToUpper returns a copy of the string s with all Unicode letters mapped to their upper case.
-func ToUpper(s string) string { return Map(unicode.ToUpper, s) }
+func ToUpper(s string) string {
+       isASCII, hasLower := true, false
+       for i := 0; i < len(s); i++ {
+               c := s[i]
+               if c >= utf8.RuneSelf {
+                       isASCII = false
+                       break
+               }
+               hasLower = hasLower || (c >= 'a' && c <= 'z')
+       }
+
+       if isASCII { // optimize for ASCII-only strings.
+               if !hasLower {
+                       return s
+               }
+               b := make([]byte, len(s))
+               for i := 0; i < len(s); i++ {
+                       c := s[i]
+                       if c >= 'a' && c <= 'z' {
+                               c -= 'a' - 'A'
+                       }
+                       b[i] = c
+               }
+               return string(b)
+       }
+       return Map(unicode.ToUpper, s)
+}
 
 // ToLower returns a copy of the string s with all Unicode letters mapped to their lower case.
 func ToLower(s string) string { return Map(unicode.ToLower, s) }
index 869be9c4778a6af7a5b679b1c02bfd9534dc28c5..b185e7eec8700e08458550a22781cc7593324e87 100644 (file)
@@ -518,9 +518,12 @@ func runStringTests(t *testing.T, f func(string) string, funcName string, testCa
 
 var upperTests = []StringTest{
        {"", ""},
+       {"ONLYUPPER", "ONLYUPPER"},
        {"abc", "ABC"},
        {"AbC123", "ABC123"},
        {"azAZ09_", "AZAZ09_"},
+       {"longStrinGwitHmixofsmaLLandcAps", "LONGSTRINGWITHMIXOFSMALLANDCAPS"},
+       {"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", "LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS"},
        {"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char
 }
 
@@ -648,6 +651,19 @@ func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTest
 
 func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) }
 
+func BenchmarkToUpper(b *testing.B) {
+       for _, tc := range upperTests {
+               b.Run(tc.in, func(b *testing.B) {
+                       for i := 0; i < b.N; i++ {
+                               actual := ToUpper(tc.in)
+                               if actual != tc.out {
+                                       b.Errorf("ToUpper(%q) = %q; want %q", tc.in, actual, tc.out)
+                               }
+                       }
+               })
+       }
+}
+
 func BenchmarkMapNoChanges(b *testing.B) {
        identity := func(r rune) rune {
                return r