bufio: fix scanning with a final empty token.

author Rob Pike <r@golang.org>

Thu, 24 Sep 2015 20:10:00 +0000 (13:10 -0700)

committer Rob Pike <r@golang.org>

Fri, 25 Sep 2015 21:46:13 +0000 (21:46 +0000)
author Rob Pike <r@golang.org>
Thu, 24 Sep 2015 20:10:00 +0000 (13:10 -0700)
committer Rob Pike <r@golang.org>
Fri, 25 Sep 2015 21:46:13 +0000 (21:46 +0000)
diff --git a/src/bufio/example_test.go b/src/bufio/example_test.go

index 3da914142194895ea37e660c621d3835fb251a86..4666e6d985561c565ba95c016cda2149cc00373a 100644 (file)
--- a/src/bufio/example_test.go
+++ b/src/bufio/example_test.go
@@ -80,3 +80,32 @@ func ExampleScanner_custom() {
         // 5678
         // Invalid input: strconv.ParseInt: parsing "1234567901234567890": value out of range
  }
+
+// Use a Scanner with a custom split function to parse a comma-separated
+// list with an empty final value.
+func ExampleScanner_emptyFinalToken() {
+       // Comma-separated list; last entry is empty.
+       const input = "1,2,3,4,"
+       scanner := bufio.NewScanner(strings.NewReader(input))
+       // Define a split function that separates on commas.
+       onComma := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
+               for i := 0; i < len(data); i++ {
+                       if data[i] == ',' {
+                               return i + 1, data[:i], nil
+                       }
+               }
+               // There is one final token to be delivered, which may be the empty string.
+               // Returning bufio.ErrFinalToken here tells Scan there are no more tokens after this
+               // but does not trigger an error to be returned from Scan itself.
+               return 0, data, bufio.ErrFinalToken
+       }
+       scanner.Split(onComma)
+       // Scan.
+       for scanner.Scan() {
+               fmt.Printf("%q ", scanner.Text())
+       }
+       if err := scanner.Err(); err != nil {
+               fmt.Fprintln(os.Stderr, "reading input:", err)
+       }
+       // Output: "1" "2" "3" "4" ""
+}
diff --git a/src/bufio/scan.go b/src/bufio/scan.go

index 0ec584b0276fe13d1ebace1c0061dc4ec6aefb8e..27a0f0045955e301ce698723e6a89c93c135c9e9 100644 (file)
--- a/src/bufio/scan.go
+++ b/src/bufio/scan.go
@@ -38,6 +38,7 @@ type Scanner struct {
         err          error     // Sticky error.
         empties      int       // Count of successive empty tokens.
         scanCalled   bool      // Scan has been called; buffer is in use.
+       done         bool      // Scan has finished.
  }
  
  // SplitFunc is the signature of the split function used to tokenize the
@@ -106,6 +107,16 @@ func (s *Scanner) Text() string {
         return string(s.token)
  }
  
+// ErrFinalToken is a special sentinel error value. It is intended to be
+// returned by a Split function to indicate that the token being delivered
+// with the error is the last token and scanning should stop after this one.
+// After ErrFinalToken is received by Scan, scanning stops with no error.
+// The value is useful to stop processing early or when it is necessary to
+// deliver a final empty token. One could achieve the same behavior
+// with a custom error value but providing one here is tidier.
+// See the emptyFinalToken example for a use of this value.
+var ErrFinalToken = errors.New("final token")
+
  // Scan advances the Scanner to the next token, which will then be
  // available through the Bytes or Text method. It returns false when the
  // scan stops, either by reaching the end of the input or an error.
@@ -115,6 +126,9 @@ func (s *Scanner) Text() string {
  // Scan panics if the split function returns 100 empty tokens without
  // advancing the input. This is a common error mode for scanners.
  func (s *Scanner) Scan() bool {
+       if s.done {
+               return false
+       }
         s.scanCalled = true
         // Loop until we have a token.
         for {
@@ -124,6 +138,11 @@ func (s *Scanner) Scan() bool {
                 if s.end > s.start || s.err != nil {
                         advance, token, err := s.split(s.buf[s.start:s.end], s.err != nil)
                         if err != nil {
+                               if err == ErrFinalToken {
+                                       s.token = token
+                                       s.done = true
+                                       return true
+                               }
                                 s.setErr(err)
                                 return false
                         }
diff --git a/src/bufio/scan_test.go b/src/bufio/scan_test.go

index ac65de9c44d198d1c04f15f8013725508bdc5bfe..07b1a56dc0a4654a294b5085c5a6a6aad15e5831 100644 (file)
--- a/src/bufio/scan_test.go
+++ b/src/bufio/scan_test.go
@@ -429,33 +429,37 @@ func commaSplit(data []byte, atEOF bool) (advance int, token []byte, err error)
                         return i + 1, data[:i], nil
                 }
         }
-       if !atEOF {
-               return 0, nil, nil
-       }
-       return 0, data, nil
+       return 0, data, ErrFinalToken
  }
  
-func TestEmptyTokens(t *testing.T) {
-       s := NewScanner(strings.NewReader("1,2,3,"))
-       values := []string{"1", "2", "3", ""}
+func testEmptyTokens(t *testing.T, text string, values []string) {
+       s := NewScanner(strings.NewReader(text))
         s.Split(commaSplit)
         var i int
-       for i = 0; i < len(values); i++ {
-               if !s.Scan() {
-                       break
+       for i = 0; s.Scan(); i++ {
+               if i >= len(values) {
+                       t.Fatalf("got %d fields, expected %d", i+1, len(values))
                 }
                 if s.Text() != values[i] {
                         t.Errorf("%d: expected %q got %q", i, values[i], s.Text())
                 }
         }
         if i != len(values) {
-               t.Errorf("got %d fields, expected %d", i, len(values))
+               t.Fatalf("got %d fields, expected %d", i, len(values))
         }
         if err := s.Err(); err != nil {
                 t.Fatal(err)
         }
  }
  
+func TestEmptyTokens(t *testing.T) {
+       testEmptyTokens(t, "1,2,3,", []string{"1", "2", "3", ""})
+}
+
+func TestWithNoEmptyTokens(t *testing.T) {
+       testEmptyTokens(t, "1,2,3", []string{"1", "2", "3"})
+}
+
  func loopAtEOFSplit(data []byte, atEOF bool) (advance int, token []byte, err error) {
         if len(data) > 0 {
                 return 1, data[:1], nil
author	Rob Pike <r@golang.org>
	Thu, 24 Sep 2015 20:10:00 +0000 (13:10 -0700)
committer	Rob Pike <r@golang.org>
	Fri, 25 Sep 2015 21:46:13 +0000 (21:46 +0000)
src/bufio/example_test.go		patch \| blob \| history
src/bufio/scan.go		patch \| blob \| history
src/bufio/scan_test.go		patch \| blob \| history