From 2bded9dc19a65005fbcb37ab1e51a9237998b09e Mon Sep 17 00:00:00 2001 From: =?utf8?q?Martin=20M=C3=B6hrmann?= Date: Fri, 4 May 2018 06:54:18 +0200 Subject: [PATCH] [release-branch.go1.10] strings: fix encoding of \u0080 in map MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Fix encoding of PAD (U+0080) which has the same value as utf8.RuneSelf being incorrectly encoded as \x80 in strings.Map due to using <= instead of a < comparison operator to check one byte encodings for utf8. Fixes golang/go#25479 Change-Id: Ib6c7d1f425a7ba81e431b6d64009e713d94ea3bc Reviewed-on: https://go-review.googlesource.com/111286 Run-TryBot: Martin Möhrmann TryBot-Result: Gobot Gobot Reviewed-by: Brad Fitzpatrick (cherry picked from commit 8c62fc0ca3c96ecbd3a6e81546aa8c53e32ff500) Reviewed-on: https://go-review.googlesource.com/114635 Run-TryBot: Andrew Bonventre --- src/strings/strings.go | 4 ++-- src/strings/strings_test.go | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/strings/strings.go b/src/strings/strings.go index 02c032046b..05e8243b2e 100644 --- a/src/strings/strings.go +++ b/src/strings/strings.go @@ -474,7 +474,7 @@ func Map(mapping func(rune) rune, s string) string { b = make([]byte, len(s)+utf8.UTFMax) nbytes = copy(b, s[:i]) if r >= 0 { - if r <= utf8.RuneSelf { + if r < utf8.RuneSelf { b[nbytes] = byte(r) nbytes++ } else { @@ -504,7 +504,7 @@ func Map(mapping func(rune) rune, s string) string { r := mapping(c) // common case - if (0 <= r && r <= utf8.RuneSelf) && nbytes < len(b) { + if (0 <= r && r < utf8.RuneSelf) && nbytes < len(b) { b[nbytes] = byte(r) nbytes++ continue diff --git a/src/strings/strings_test.go b/src/strings/strings_test.go index d8fcb62a87..6c1193873b 100644 --- a/src/strings/strings_test.go +++ b/src/strings/strings_test.go @@ -528,6 +528,7 @@ var upperTests = []StringTest{ {"longStrinGwitHmixofsmaLLandcAps", "LONGSTRINGWITHMIXOFSMALLANDCAPS"}, {"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", "LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS"}, {"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char + {"a\u0080\U0010FFFF", "A\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune } var lowerTests = []StringTest{ @@ -538,6 +539,7 @@ var lowerTests = []StringTest{ {"longStrinGwitHmixofsmaLLandcAps", "longstringwithmixofsmallandcaps"}, {"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", "long\u0250string\u0250with\u0250nonascii\u0250chars"}, {"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char + {"A\u0080\U0010FFFF", "a\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune } const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000" @@ -650,6 +652,27 @@ func TestMap(t *testing.T) { if m != expect { t.Errorf("replace invalid sequence: expected %q got %q", expect, m) } + + // 8. Check utf8.RuneSelf and utf8.MaxRune encoding + encode := func(r rune) rune { + switch r { + case utf8.RuneSelf: + return unicode.MaxRune + case unicode.MaxRune: + return utf8.RuneSelf + } + return r + } + s := string(utf8.RuneSelf) + string(utf8.MaxRune) + r := string(utf8.MaxRune) + string(utf8.RuneSelf) // reverse of s + m = Map(encode, s) + if m != r { + t.Errorf("encoding not handled correctly: expected %q got %q", r, m) + } + m = Map(encode, r) + if m != s { + t.Errorf("encoding not handled correctly: expected %q got %q", s, m) + } } func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) } -- 2.50.0