]> Cypherpunks repositories - gostls13.git/commitdiff
encoding/csv: avoid mangling invalid UTF-8 in Writer
authorJoe Tsai <joetsai@digital-static.net>
Wed, 7 Mar 2018 22:14:19 +0000 (14:14 -0800)
committerJoe Tsai <thebrokentoaster@gmail.com>
Thu, 8 Mar 2018 03:26:22 +0000 (03:26 +0000)
In the situation where a quoted field is necessary, avoid processing
each UTF-8 rune one-by-one, which causes mangling of invalid sequences
into utf8.RuneError, causing a loss of information.
Instead, search only for the escaped characters, handle those specially
and copy everything else in between verbatim.

This symmetrically matches the behavior of Reader.

Fixes #24298

Change-Id: I9276f64891084ce8487678f663fad711b4095dbb
Reviewed-on: https://go-review.googlesource.com/99297
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
src/encoding/csv/writer.go
src/encoding/csv/writer_test.go

index ef3594e523c9edeadd8d4008701c19db7beaf6df..31c4f9c22dcfe6ffd9abd41e7d72bf7e3a8040d8 100644 (file)
@@ -57,33 +57,46 @@ func (w *Writer) Write(record []string) error {
                        }
                        continue
                }
+
                if err := w.w.WriteByte('"'); err != nil {
                        return err
                }
+               for len(field) > 0 {
+                       // Search for special characters.
+                       i := strings.IndexAny(field, "\"\r\n")
+                       if i < 0 {
+                               i = len(field)
+                       }
+
+                       // Copy verbatim everything before the special character.
+                       if _, err := w.w.WriteString(field[:i]); err != nil {
+                               return err
+                       }
+                       field = field[i:]
 
-               for _, r1 := range field {
-                       var err error
-                       switch r1 {
-                       case '"':
-                               _, err = w.w.WriteString(`""`)
-                       case '\r':
-                               if !w.UseCRLF {
-                                       err = w.w.WriteByte('\r')
+                       // Encode the special character.
+                       if len(field) > 0 {
+                               var err error
+                               switch field[0] {
+                               case '"':
+                                       _, err = w.w.WriteString(`""`)
+                               case '\r':
+                                       if !w.UseCRLF {
+                                               err = w.w.WriteByte('\r')
+                                       }
+                               case '\n':
+                                       if w.UseCRLF {
+                                               _, err = w.w.WriteString("\r\n")
+                                       } else {
+                                               err = w.w.WriteByte('\n')
+                                       }
                                }
-                       case '\n':
-                               if w.UseCRLF {
-                                       _, err = w.w.WriteString("\r\n")
-                               } else {
-                                       err = w.w.WriteByte('\n')
+                               field = field[1:]
+                               if err != nil {
+                                       return err
                                }
-                       default:
-                               _, err = w.w.WriteRune(r1)
-                       }
-                       if err != nil {
-                               return err
                        }
                }
-
                if err := w.w.WriteByte('"'); err != nil {
                        return err
                }
index 8ddca0abe0c13f09e13bc4a30718d09d17829541..99bc84e998a2a2f37cabf6e7c74f09fbb2678e7a 100644 (file)
@@ -39,6 +39,8 @@ var writeTests = []struct {
        {Input: [][]string{{"a", "a", ""}}, Output: "a,a,\n"},
        {Input: [][]string{{"a", "a", "a"}}, Output: "a,a,a\n"},
        {Input: [][]string{{`\.`}}, Output: "\"\\.\"\n"},
+       {Input: [][]string{{"x09\x41\xb4\x1c", "aktau"}}, Output: "x09\x41\xb4\x1c,aktau\n"},
+       {Input: [][]string{{",x09\x41\xb4\x1c", "aktau"}}, Output: "\",x09\x41\xb4\x1c\",aktau\n"},
 }
 
 func TestWrite(t *testing.T) {