From f2bc275525807e1c83f524325d03cd6e7e18fe7d Mon Sep 17 00:00:00 2001 From: Pieter Droogendijk Date: Fri, 9 Aug 2013 15:46:01 +1000 Subject: [PATCH] encoding/csv: always allow trailing commas Original CL by rsc (11916045): The motivation for disallowing them was RFC 4180 saying "The last field in the record must not be followed by a comma." I believe this is an admonition to CSV generators, not readers. When reading, anything followed by a comma is not the last field. Fixes #5892. R=golang-dev, rsc, r CC=golang-dev https://golang.org/cl/12294043 --- src/pkg/encoding/csv/reader.go | 58 +++++++---------------------- src/pkg/encoding/csv/reader_test.go | 33 ++++++++-------- 2 files changed, 32 insertions(+), 59 deletions(-) diff --git a/src/pkg/encoding/csv/reader.go b/src/pkg/encoding/csv/reader.go index b099caf60a..b328dcc375 100644 --- a/src/pkg/encoding/csv/reader.go +++ b/src/pkg/encoding/csv/reader.go @@ -72,7 +72,7 @@ func (e *ParseError) Error() string { // These are the errors that can be returned in ParseError.Error var ( - ErrTrailingComma = errors.New("extra delimiter at end of line") + ErrTrailingComma = errors.New("extra delimiter at end of line") // no longer used ErrBareQuote = errors.New("bare \" in non-quoted-field") ErrQuote = errors.New("extraneous \" in field") ErrFieldCount = errors.New("wrong number of fields in line") @@ -98,16 +98,14 @@ var ( // If LazyQuotes is true, a quote may appear in an unquoted field and a // non-doubled quote may appear in a quoted field. // -// If TrailingComma is true, the last field may be an unquoted empty field. -// // If TrimLeadingSpace is true, leading white space in a field is ignored. type Reader struct { - Comma rune // Field delimiter (set to ',' by NewReader) - Comment rune // Comment character for start of line - FieldsPerRecord int // Number of expected fields per record - LazyQuotes bool // Allow lazy quotes - TrailingComma bool // Allow trailing comma - TrimLeadingSpace bool // Trim leading space + Comma rune // field delimiter (set to ',' by NewReader) + Comment rune // comment character for start of line + FieldsPerRecord int // number of expected fields per record + LazyQuotes bool // allow lazy quotes + TrailingComma bool // ignored; here for backwards compatibility + TrimLeadingSpace bool // trim leading space line int column int r *bufio.Reader @@ -257,23 +255,15 @@ func (r *Reader) parseField() (haveField bool, delim rune, err error) { r.field.Reset() r1, err := r.readRune() - if err != nil { - // If we have EOF and are not at the start of a line - // then we return the empty field. We have already - // checked for trailing commas if needed. - if err == io.EOF && r.column != 0 { - return true, 0, err - } - return false, 0, err + for err == nil && r.TrimLeadingSpace && r1 != '\n' && unicode.IsSpace(r1) { + r1, err = r.readRune() } - if r.TrimLeadingSpace { - for r1 != '\n' && unicode.IsSpace(r1) { - r1, err = r.readRune() - if err != nil { - return false, 0, err - } - } + if err == io.EOF && r.column != 0 { + return true, 0, err + } + if err != nil { + return false, 0, err } switch r1 { @@ -349,25 +339,5 @@ func (r *Reader) parseField() (haveField bool, delim rune, err error) { return false, 0, err } - if !r.TrailingComma { - // We don't allow trailing commas. See if we - // are at the end of the line (being mindful - // of trimming spaces). - c := r.column - r1, err = r.readRune() - if r.TrimLeadingSpace { - for r1 != '\n' && unicode.IsSpace(r1) { - r1, err = r.readRune() - if err != nil { - break - } - } - } - if err == io.EOF || r1 == '\n' { - r.column = c // report the comma - return false, 0, r.error(ErrTrailingComma) - } - r.unreadRune() - } return true, r1, nil } diff --git a/src/pkg/encoding/csv/reader_test.go b/src/pkg/encoding/csv/reader_test.go index 5fd84a76bd..123df06bc8 100644 --- a/src/pkg/encoding/csv/reader_test.go +++ b/src/pkg/encoding/csv/reader_test.go @@ -171,32 +171,32 @@ field"`, Output: [][]string{{"a", "b", "c"}, {"d", "e"}}, }, { - Name: "BadTrailingCommaEOF", - Input: "a,b,c,", - Error: "extra delimiter at end of line", Line: 1, Column: 5, + Name: "TrailingCommaEOF", + Input: "a,b,c,", + Output: [][]string{{"a", "b", "c", ""}}, }, { - Name: "BadTrailingCommaEOL", - Input: "a,b,c,\n", - Error: "extra delimiter at end of line", Line: 1, Column: 5, + Name: "TrailingCommaEOL", + Input: "a,b,c,\n", + Output: [][]string{{"a", "b", "c", ""}}, }, { - Name: "BadTrailingCommaSpaceEOF", + Name: "TrailingCommaSpaceEOF", TrimLeadingSpace: true, Input: "a,b,c, ", - Error: "extra delimiter at end of line", Line: 1, Column: 5, + Output: [][]string{{"a", "b", "c", ""}}, }, { - Name: "BadTrailingCommaSpaceEOL", + Name: "TrailingCommaSpaceEOL", TrimLeadingSpace: true, Input: "a,b,c, \n", - Error: "extra delimiter at end of line", Line: 1, Column: 5, + Output: [][]string{{"a", "b", "c", ""}}, }, { - Name: "BadTrailingCommaLine3", + Name: "TrailingCommaLine3", TrimLeadingSpace: true, Input: "a,b,c\nd,e,f\ng,hi,", - Error: "extra delimiter at end of line", Line: 3, Column: 4, + Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}}, }, { Name: "NotTrailingComma3", @@ -231,7 +231,7 @@ x,,, }, }, { - Name: "Issue 2366", + Name: "TrailingCommaIneffective1", TrailingComma: true, TrimLeadingSpace: true, Input: "a,b,\nc,d,e", @@ -241,11 +241,14 @@ x,,, }, }, { - Name: "Issue 2366a", + Name: "TrailingCommaIneffective2", TrailingComma: false, TrimLeadingSpace: true, Input: "a,b,\nc,d,e", - Error: "extra delimiter at end of line", + Output: [][]string{ + {"a", "b", ""}, + {"c", "d", "e"}, + }, }, } -- 2.48.1