From 0add9a4dcfb4cbc0d99cd168752bd1bd641ce8e2 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Wed, 7 Mar 2018 14:14:19 -0800 Subject: [PATCH] encoding/csv: avoid mangling invalid UTF-8 in Writer In the situation where a quoted field is necessary, avoid processing each UTF-8 rune one-by-one, which causes mangling of invalid sequences into utf8.RuneError, causing a loss of information. Instead, search only for the escaped characters, handle those specially and copy everything else in between verbatim. This symmetrically matches the behavior of Reader. Fixes #24298 Change-Id: I9276f64891084ce8487678f663fad711b4095dbb Reviewed-on: https://go-review.googlesource.com/99297 Run-TryBot: Joe Tsai TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- src/encoding/csv/writer.go | 51 +++++++++++++++++++++------------ src/encoding/csv/writer_test.go | 2 ++ 2 files changed, 34 insertions(+), 19 deletions(-) diff --git a/src/encoding/csv/writer.go b/src/encoding/csv/writer.go index ef3594e523..31c4f9c22d 100644 --- a/src/encoding/csv/writer.go +++ b/src/encoding/csv/writer.go @@ -57,33 +57,46 @@ func (w *Writer) Write(record []string) error { } continue } + if err := w.w.WriteByte('"'); err != nil { return err } + for len(field) > 0 { + // Search for special characters. + i := strings.IndexAny(field, "\"\r\n") + if i < 0 { + i = len(field) + } + + // Copy verbatim everything before the special character. + if _, err := w.w.WriteString(field[:i]); err != nil { + return err + } + field = field[i:] - for _, r1 := range field { - var err error - switch r1 { - case '"': - _, err = w.w.WriteString(`""`) - case '\r': - if !w.UseCRLF { - err = w.w.WriteByte('\r') + // Encode the special character. + if len(field) > 0 { + var err error + switch field[0] { + case '"': + _, err = w.w.WriteString(`""`) + case '\r': + if !w.UseCRLF { + err = w.w.WriteByte('\r') + } + case '\n': + if w.UseCRLF { + _, err = w.w.WriteString("\r\n") + } else { + err = w.w.WriteByte('\n') + } } - case '\n': - if w.UseCRLF { - _, err = w.w.WriteString("\r\n") - } else { - err = w.w.WriteByte('\n') + field = field[1:] + if err != nil { + return err } - default: - _, err = w.w.WriteRune(r1) - } - if err != nil { - return err } } - if err := w.w.WriteByte('"'); err != nil { return err } diff --git a/src/encoding/csv/writer_test.go b/src/encoding/csv/writer_test.go index 8ddca0abe0..99bc84e998 100644 --- a/src/encoding/csv/writer_test.go +++ b/src/encoding/csv/writer_test.go @@ -39,6 +39,8 @@ var writeTests = []struct { {Input: [][]string{{"a", "a", ""}}, Output: "a,a,\n"}, {Input: [][]string{{"a", "a", "a"}}, Output: "a,a,a\n"}, {Input: [][]string{{`\.`}}, Output: "\"\\.\"\n"}, + {Input: [][]string{{"x09\x41\xb4\x1c", "aktau"}}, Output: "x09\x41\xb4\x1c,aktau\n"}, + {Input: [][]string{{",x09\x41\xb4\x1c", "aktau"}}, Output: "\",x09\x41\xb4\x1c\",aktau\n"}, } func TestWrite(t *testing.T) { -- 2.48.1