]> Cypherpunks repositories - gostls13.git/commitdiff
strings: fix handling of invalid UTF-8 sequences in Map
authorMartin Möhrmann <moehrmann@google.com>
Tue, 28 Feb 2017 20:21:45 +0000 (21:21 +0100)
committerMartin Möhrmann <moehrmann@google.com>
Tue, 28 Feb 2017 22:50:53 +0000 (22:50 +0000)
The new Map implementation introduced in golang.org/cl/33201
did not differentiate if an invalid UTF-8 sequence was decoded
or the RuneError rune. It would therefore always advance by
3 bytes (which is the length of the RuneError rune) instead
of 1 for an invalid sequences. This cl adds a check to correctly
determine the length of bytes needed to advance to the next rune.

Fixes #19330.

Change-Id: I1e7f9333f3ef6068ffc64015bb0a9f32b0b7111d
Reviewed-on: https://go-review.googlesource.com/37597
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
src/strings/strings.go
src/strings/strings_test.go

index 188d8cbc09345e14bce76a7c5824dfce880cc559..9ca222fdfa6dcc7b3075ffcc2d5b9d73a16a58c4 100644 (file)
@@ -406,7 +406,17 @@ func Map(mapping func(rune) rune, s string) string {
                                nbytes += utf8.EncodeRune(b[nbytes:], r)
                        }
                }
-               i += utf8.RuneLen(c)
+
+               if c == utf8.RuneError {
+                       // RuneError is the result of either decoding
+                       // an invalid sequence or '\uFFFD'. Determine
+                       // the correct number of bytes we need to advance.
+                       _, w := utf8.DecodeRuneInString(s[i:])
+                       i += w
+               } else {
+                       i += utf8.RuneLen(c)
+               }
+
                s = s[i:]
                break
        }
index 3378d54fe20ac789ce4339363ab4b592926a8916..97041eb9aca71cd31d192b857c7e4a8668868901 100644 (file)
@@ -625,6 +625,19 @@ func TestMap(t *testing.T) {
                (*reflect.StringHeader)(unsafe.Pointer(&m)).Data {
                t.Error("unexpected copy during identity map")
        }
+
+       // 7. Handle invalid UTF-8 sequence
+       replaceNotLatin := func(r rune) rune {
+               if unicode.Is(unicode.Latin, r) {
+                       return r
+               }
+               return '?'
+       }
+       m = Map(replaceNotLatin, "Hello\255World")
+       expect = "Hello?World"
+       if m != expect {
+               t.Errorf("replace invalid sequence: expected %q got %q", expect, m)
+       }
 }
 
 func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }