bufio: handle excessive white space in ScanWords

author Matthew Dempsky <mdempsky@google.com>

Mon, 16 Jun 2014 19:59:10 +0000 (12:59 -0700)

committer Rob Pike <r@golang.org>

Mon, 16 Jun 2014 19:59:10 +0000 (12:59 -0700)
author Matthew Dempsky <mdempsky@google.com>
Mon, 16 Jun 2014 19:59:10 +0000 (12:59 -0700)
committer Rob Pike <r@golang.org>
Mon, 16 Jun 2014 19:59:10 +0000 (12:59 -0700)
diff --git a/src/pkg/bufio/scan.go b/src/pkg/bufio/scan.go

index 715ce071e3bb8113595620067ae11ee7a8834b99..97ae1090956e65502c9f0346c60cf5d99a877262 100644 (file)
--- a/src/pkg/bufio/scan.go
+++ b/src/pkg/bufio/scan.go
@@ -326,9 +326,6 @@ func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) {
                         break
                 }
         }
-       if atEOF && len(data) == 0 {
-               return 0, nil, nil
-       }
         // Scan until space, marking end of word.
         for width, i := 0, start; i < len(data); i += width {
                 var r rune
@@ -342,5 +339,5 @@ func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) {
                 return len(data), data[start:], nil
         }
         // Request more data.
-       return 0, nil, nil
+       return start, nil, nil
  }
diff --git a/src/pkg/bufio/scan_test.go b/src/pkg/bufio/scan_test.go

index 0db7cad2047545926756ac573a6720d5e37b275b..ce49ece93a7b5ed0d58dfd717d56e38598b0b1c9 100644 (file)
--- a/src/pkg/bufio/scan_test.go
+++ b/src/pkg/bufio/scan_test.go
@@ -15,6 +15,8 @@ import (
         "unicode/utf8"
  )
  
+const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
+
  // Test white space table matches the Unicode definition.
  func TestSpace(t *testing.T) {
         for r := rune(0); r <= utf8.MaxRune; r++ {
@@ -172,7 +174,6 @@ func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) {
  
  // Test the line splitter, including some carriage returns but no long lines.
  func TestScanLongLines(t *testing.T) {
-       const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
         // Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
         tmp := new(bytes.Buffer)
         buf := new(bytes.Buffer)
@@ -404,3 +405,17 @@ func TestBadReader(t *testing.T) {
                 t.Errorf("unexpected error: %v", err)
         }
  }
+
+func TestScanWordsExcessiveWhiteSpace(t *testing.T) {
+       const word = "ipsum"
+       s := strings.Repeat(" ", 4*smallMaxTokenSize) + word
+       scanner := NewScanner(strings.NewReader(s))
+       scanner.MaxTokenSize(smallMaxTokenSize)
+       scanner.Split(ScanWords)
+       if !scanner.Scan() {
+               t.Fatal("scan failed: %v", scanner.Err())
+       }
+       if token := scanner.Text(); token != word {
+               t.Fatal("unexpected token: %v", token)
+       }
+}
author	Matthew Dempsky <mdempsky@google.com>
	Mon, 16 Jun 2014 19:59:10 +0000 (12:59 -0700)
committer	Rob Pike <r@golang.org>
	Mon, 16 Jun 2014 19:59:10 +0000 (12:59 -0700)
src/pkg/bufio/scan.go		patch \| blob \| history
src/pkg/bufio/scan_test.go		patch \| blob \| history