]> Cypherpunks repositories - gostls13.git/commitdiff
strings: fix encoding of \u0080 in map
authorMartin Möhrmann <moehrmann@google.com>
Fri, 4 May 2018 04:54:18 +0000 (06:54 +0200)
committerBrad Fitzpatrick <bradfitz@golang.org>
Fri, 4 May 2018 05:37:45 +0000 (05:37 +0000)
Fix encoding of PAD (U+0080) which has the same value as utf8.RuneSelf
being incorrectly encoded as \x80 in strings.Map due to using <= instead
of a < comparison operator to check one byte encodings for utf8.

Fixes #25242

Change-Id: Ib6c7d1f425a7ba81e431b6d64009e713d94ea3bc
Reviewed-on: https://go-review.googlesource.com/111286
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
src/strings/strings.go
src/strings/strings_test.go

index 45345e008888e27246b2f4412211192d4bf27cb2..adbbe742fcebe65a5d62b204113238bc57ddac67 100644 (file)
@@ -479,7 +479,7 @@ func Map(mapping func(rune) rune, s string) string {
                b = make([]byte, len(s)+utf8.UTFMax)
                nbytes = copy(b, s[:i])
                if r >= 0 {
-                       if r <= utf8.RuneSelf {
+                       if r < utf8.RuneSelf {
                                b[nbytes] = byte(r)
                                nbytes++
                        } else {
@@ -509,7 +509,7 @@ func Map(mapping func(rune) rune, s string) string {
                r := mapping(c)
 
                // common case
-               if (0 <= r && r <= utf8.RuneSelf) && nbytes < len(b) {
+               if (0 <= r && r < utf8.RuneSelf) && nbytes < len(b) {
                        b[nbytes] = byte(r)
                        nbytes++
                        continue
index 876f06c67482b96886363fed9ae968900c4aba30..78bc573e5f0bf18064d521c6c0bfadaf48033643 100644 (file)
@@ -528,6 +528,7 @@ var upperTests = []StringTest{
        {"longStrinGwitHmixofsmaLLandcAps", "LONGSTRINGWITHMIXOFSMALLANDCAPS"},
        {"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", "LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS"},
        {"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char
+       {"a\u0080\U0010FFFF", "A\u0080\U0010FFFF"},                           // test utf8.RuneSelf and utf8.MaxRune
 }
 
 var lowerTests = []StringTest{
@@ -538,6 +539,7 @@ var lowerTests = []StringTest{
        {"longStrinGwitHmixofsmaLLandcAps", "longstringwithmixofsmallandcaps"},
        {"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", "long\u0250string\u0250with\u0250nonascii\u0250chars"},
        {"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char
+       {"A\u0080\U0010FFFF", "a\u0080\U0010FFFF"},                           // test utf8.RuneSelf and utf8.MaxRune
 }
 
 const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000"
@@ -650,6 +652,27 @@ func TestMap(t *testing.T) {
        if m != expect {
                t.Errorf("replace invalid sequence: expected %q got %q", expect, m)
        }
+
+       // 8. Check utf8.RuneSelf and utf8.MaxRune encoding
+       encode := func(r rune) rune {
+               switch r {
+               case utf8.RuneSelf:
+                       return unicode.MaxRune
+               case unicode.MaxRune:
+                       return utf8.RuneSelf
+               }
+               return r
+       }
+       s := string(utf8.RuneSelf) + string(utf8.MaxRune)
+       r := string(utf8.MaxRune) + string(utf8.RuneSelf) // reverse of s
+       m = Map(encode, s)
+       if m != r {
+               t.Errorf("encoding not handled correctly: expected %q got %q", r, m)
+       }
+       m = Map(encode, r)
+       if m != s {
+               t.Errorf("encoding not handled correctly: expected %q got %q", s, m)
+       }
 }
 
 func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }