]> Cypherpunks repositories - gostls13.git/commitdiff
bytes: optimize ToLower and ToUpper for ASCII-only case
authorTobias Klauser <tklauser@distanz.ch>
Mon, 8 Apr 2019 13:53:35 +0000 (15:53 +0200)
committerTobias Klauser <tobias.klauser@gmail.com>
Tue, 9 Apr 2019 05:45:34 +0000 (05:45 +0000)
Follow what CL 68370 and CL 76470 did for the respective functions in
package strings.

Also adjust godoc strings to match the respective strings functions and
mention the special case for ASCII-only byte slices which don't need
conversion.

name                                       old time/op  new time/op  delta
ToUpper/#00-8                              9.35ns ± 3%  6.08ns ± 2%  -35.04%  (p=0.000 n=9+9)
ToUpper/ONLYUPPER-8                        77.7ns ± 1%  16.9ns ± 2%  -78.22%  (p=0.000 n=10+10)
ToUpper/abc-8                              36.5ns ± 1%  22.1ns ± 1%  -39.43%  (p=0.000 n=10+8)
ToUpper/AbC123-8                           56.9ns ± 2%  28.2ns ± 2%  -50.54%  (p=0.000 n=8+10)
ToUpper/azAZ09_-8                          62.3ns ± 1%  26.9ns ± 1%  -56.82%  (p=0.000 n=9+10)
ToUpper/longStrinGwitHmixofsmaLLandcAps-8   219ns ± 2%    63ns ± 2%  -71.17%  (p=0.000 n=10+10)
ToUpper/longɐstringɐwithɐnonasciiⱯchars-8   367ns ± 2%   374ns ± 3%   +2.05%  (p=0.000 n=9+10)
ToUpper/ɐɐɐɐɐ-8                             200ns ± 1%   206ns ± 1%   +2.49%  (p=0.000 n=10+10)
ToUpper/a\u0080\U0010ffff-8                90.4ns ± 1%  93.8ns ± 0%   +3.82%  (p=0.000 n=10+7)
ToLower/#00-8                              9.59ns ± 1%  6.13ns ± 2%  -36.08%  (p=0.000 n=10+10)
ToLower/abc-8                              36.4ns ± 1%  10.4ns ± 1%  -71.50%  (p=0.000 n=10+10)
ToLower/AbC123-8                           55.8ns ± 1%  27.5ns ± 1%  -50.61%  (p=0.000 n=10+10)
ToLower/azAZ09_-8                          61.7ns ± 1%  30.2ns ± 1%  -50.98%  (p=0.000 n=8+10)
ToLower/longStrinGwitHmixofsmaLLandcAps-8   226ns ± 1%    64ns ± 1%  -71.53%  (p=0.000 n=10+9)
ToLower/LONGⱯSTRINGⱯWITHⱯNONASCIIⱯCHARS-8   354ns ± 0%   361ns ± 0%   +2.18%  (p=0.000 n=10+10)
ToLower/ⱭⱭⱭⱭⱭ-8                             180ns ± 1%   186ns ± 0%   +3.45%  (p=0.000 n=10+9)
ToLower/A\u0080\U0010ffff-8                91.7ns ± 0%  94.5ns ± 0%   +2.99%  (p=0.000 n=10+10)

Change-Id: Ifdb8ae328ff9feacd1c170db8eebbf98c399e204
Reviewed-on: https://go-review.googlesource.com/c/go/+/170954
Run-TryBot: Tobias Klauser <tobias.klauser@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
src/bytes/bytes.go
src/bytes/bytes_test.go

index bdd55fca4a75490c3f0cd1daa4658cd33f00f00e..22aeded5e134a0af2c5091a0b33fd75a86e223fd 100644 (file)
@@ -521,11 +521,66 @@ func Repeat(b []byte, count int) []byte {
        return nb
 }
 
-// ToUpper treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters within it mapped to their upper case.
-func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) }
+// ToUpper returns a copy of the byte slice s with all Unicode letters mapped to
+// their upper case.
+func ToUpper(s []byte) []byte {
+       isASCII, hasLower := true, false
+       for i := 0; i < len(s); i++ {
+               c := s[i]
+               if c >= utf8.RuneSelf {
+                       isASCII = false
+                       break
+               }
+               hasLower = hasLower || ('a' <= c && c <= 'z')
+       }
+
+       if isASCII { // optimize for ASCII-only byte slices.
+               if !hasLower {
+                       // Just return a copy.
+                       return append([]byte(""), s...)
+               }
+               b := make([]byte, len(s))
+               for i := 0; i < len(s); i++ {
+                       c := s[i]
+                       if 'a' <= c && c <= 'z' {
+                               c -= 'a' - 'A'
+                       }
+                       b[i] = c
+               }
+               return b
+       }
+       return Map(unicode.ToUpper, s)
+}
 
-// ToLower treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their lower case.
-func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) }
+// ToLower returns a copy of the byte slice s with all Unicode letters mapped to
+// their lower case.
+func ToLower(s []byte) []byte {
+       isASCII, hasUpper := true, false
+       for i := 0; i < len(s); i++ {
+               c := s[i]
+               if c >= utf8.RuneSelf {
+                       isASCII = false
+                       break
+               }
+               hasUpper = hasUpper || ('A' <= c && c <= 'Z')
+       }
+
+       if isASCII { // optimize for ASCII-only byte slices.
+               if !hasUpper {
+                       return append([]byte(""), s...)
+               }
+               b := make([]byte, len(s))
+               for i := 0; i < len(s); i++ {
+                       c := s[i]
+                       if 'A' <= c && c <= 'Z' {
+                               c += 'a' - 'A'
+                       }
+                       b[i] = c
+               }
+               return b
+       }
+       return Map(unicode.ToLower, s)
+}
 
 // ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case.
 func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
index d760d4b52ab34033b46e05599ca72a9a5662269e..340810facf4056b38385d2eb380df83af0c30523 100644 (file)
@@ -891,10 +891,14 @@ type StringTest struct {
 
 var upperTests = []StringTest{
        {"", []byte("")},
+       {"ONLYUPPER", []byte("ONLYUPPER")},
        {"abc", []byte("ABC")},
        {"AbC123", []byte("ABC123")},
        {"azAZ09_", []byte("AZAZ09_")},
+       {"longStrinGwitHmixofsmaLLandcAps", []byte("LONGSTRINGWITHMIXOFSMALLANDCAPS")},
+       {"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", []byte("LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS")},
        {"\u0250\u0250\u0250\u0250\u0250", []byte("\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F")}, // grows one byte per char
+       {"a\u0080\U0010FFFF", []byte("A\u0080\U0010FFFF")},                           // test utf8.RuneSelf and utf8.MaxRune
 }
 
 var lowerTests = []StringTest{
@@ -902,7 +906,10 @@ var lowerTests = []StringTest{
        {"abc", []byte("abc")},
        {"AbC123", []byte("abc123")},
        {"azAZ09_", []byte("azaz09_")},
+       {"longStrinGwitHmixofsmaLLandcAps", []byte("longstringwithmixofsmallandcaps")},
+       {"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", []byte("long\u0250string\u0250with\u0250nonascii\u0250chars")},
        {"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", []byte("\u0251\u0251\u0251\u0251\u0251")}, // shrinks one byte per char
+       {"A\u0080\U0010FFFF", []byte("a\u0080\U0010FFFF")},                           // test utf8.RuneSelf and utf8.MaxRune
 }
 
 const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000"
@@ -1029,6 +1036,34 @@ func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTest
 
 func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) }
 
+func BenchmarkToUpper(b *testing.B) {
+       for _, tc := range upperTests {
+               tin := []byte(tc.in)
+               b.Run(tc.in, func(b *testing.B) {
+                       for i := 0; i < b.N; i++ {
+                               actual := ToUpper(tin)
+                               if !Equal(actual, tc.out) {
+                                       b.Errorf("ToUpper(%q) = %q; want %q", tc.in, actual, tc.out)
+                               }
+                       }
+               })
+       }
+}
+
+func BenchmarkToLower(b *testing.B) {
+       for _, tc := range lowerTests {
+               tin := []byte(tc.in)
+               b.Run(tc.in, func(b *testing.B) {
+                       for i := 0; i < b.N; i++ {
+                               actual := ToLower(tin)
+                               if !Equal(actual, tc.out) {
+                                       b.Errorf("ToLower(%q) = %q; want %q", tc.in, actual, tc.out)
+                               }
+                       }
+               })
+       }
+}
+
 func TestTrimSpace(t *testing.T) { runStringTests(t, TrimSpace, "TrimSpace", trimSpaceTests) }
 
 type RepeatTest struct {