go/scanner: report too short escape sequences

author Robert Griesemer <gri@golang.org>

Wed, 15 Jan 2014 17:50:55 +0000 (09:50 -0800)

committer Robert Griesemer <gri@golang.org>

Wed, 15 Jan 2014 17:50:55 +0000 (09:50 -0800)
author Robert Griesemer <gri@golang.org>
Wed, 15 Jan 2014 17:50:55 +0000 (09:50 -0800)
committer Robert Griesemer <gri@golang.org>
Wed, 15 Jan 2014 17:50:55 +0000 (09:50 -0800)
diff --git a/src/pkg/go/scanner/scanner.go b/src/pkg/go/scanner/scanner.go

index 073bebd36da74a2593dfc2828193b026265686bd..25588ba3b0c0bb166dd518a2adbdb2c5df729501 100644 (file)
--- a/src/pkg/go/scanner/scanner.go
+++ b/src/pkg/go/scanner/scanner.go
@@ -358,60 +358,77 @@ exit:
         return tok, string(s.src[offs:s.offset])
  }
  
-func (s *Scanner) scanEscape(quote rune) {
+// scanEscape parses an escape sequence where rune is the accepted
+// escaped quote. In case of a syntax error, it stops at the offending
+// character (without consuming it) and returns false. Otherwise
+// it returns true.
+func (s *Scanner) scanEscape(quote rune) bool {
         offs := s.offset
  
-       var i, base, max uint32
+       var n int
+       var base, max uint32
         switch s.ch {
         case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
                 s.next()
-               return
+               return true
         case '0', '1', '2', '3', '4', '5', '6', '7':
-               i, base, max = 3, 8, 255
+               n, base, max = 3, 8, 255
         case 'x':
                 s.next()
-               i, base, max = 2, 16, 255
+               n, base, max = 2, 16, 255
         case 'u':
                 s.next()
-               i, base, max = 4, 16, unicode.MaxRune
+               n, base, max = 4, 16, unicode.MaxRune
         case 'U':
                 s.next()
-               i, base, max = 8, 16, unicode.MaxRune
+               n, base, max = 8, 16, unicode.MaxRune
         default:
-               s.next() // always make progress
-               s.error(offs, "unknown escape sequence")
-               return
+               msg := "unknown escape sequence"
+               if s.ch < 0 {
+                       msg = "escape sequence not terminated"
+               }
+               s.error(offs, msg)
+               return false
         }
  
         var x uint32
-       for ; i > 0 && s.ch != quote && s.ch >= 0; i-- {
+       for n > 0 {
                 d := uint32(digitVal(s.ch))
                 if d >= base {
-                       s.error(s.offset, "illegal character in escape sequence")
-                       break
+                       msg := fmt.Sprintf("illegal character %#U in escape sequence", s.ch)
+                       if s.ch < 0 {
+                               msg = "escape sequence not terminated"
+                       }
+                       s.error(s.offset, msg)
+                       return false
                 }
                 x = x*base + d
                 s.next()
+               n--
         }
-       // in case of an error, consume remaining chars
-       for ; i > 0 && s.ch != quote && s.ch >= 0; i-- {
-               s.next()
-       }
+
         if x > max || 0xD800 <= x && x < 0xE000 {
                 s.error(offs, "escape sequence is invalid Unicode code point")
+               return false
         }
+
+       return true
  }
  
  func (s *Scanner) scanRune() string {
         // '\'' opening already consumed
         offs := s.offset - 1
  
+       valid := true
         n := 0
         for {
                 ch := s.ch
                 if ch == '\n' || ch < 0 {
-                       s.error(offs, "rune literal not terminated")
-                       n = 1 // avoid further errors
+                       // only report error if we don't have one already
+                       if valid {
+                               s.error(offs, "rune literal not terminated")
+                               valid = false
+                       }
                         break
                 }
                 s.next()
@@ -420,11 +437,14 @@ func (s *Scanner) scanRune() string {
                 }
                 n++
                 if ch == '\\' {
-                       s.scanEscape('\'')
+                       if !s.scanEscape('\'') {
+                               valid = false
+                       }
+                       // continue to read to closing quote
                 }
         }
  
-       if n != 1 {
+       if valid && n != 1 {
                 s.error(offs, "illegal rune literal")
         }
  
diff --git a/src/pkg/go/scanner/scanner_test.go b/src/pkg/go/scanner/scanner_test.go

index a26785ebc4b892380f317be12d4be60829c39e4f..e0d0b54f68e6d73d11cf7b05bbc290982613d5b3 100644 (file)
--- a/src/pkg/go/scanner/scanner_test.go
+++ b/src/pkg/go/scanner/scanner_test.go
@@ -641,13 +641,9 @@ func checkError(t *testing.T, src string, tok token.Token, pos int, lit, err str
         }
         s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), eh, ScanComments|dontInsertSemis)
         _, tok0, lit0 := s.Scan()
-       _, tok1, _ := s.Scan()
         if tok0 != tok {
                 t.Errorf("%q: got %s, expected %s", src, tok0, tok)
         }
-       if tok1 != token.EOF {
-               t.Errorf("%q: got %s, expected EOF", src, tok1)
-       }
         if tok0 != token.ILLEGAL && lit0 != lit {
                 t.Errorf("%q: got literal %q, expected %q", src, lit0, lit)
         }
@@ -678,12 +674,34 @@ var errors = []struct {
         {`…`, token.ILLEGAL, 0, "", "illegal character U+2026 '…'"},
         {`' '`, token.CHAR, 0, `' '`, ""},
         {`''`, token.CHAR, 0, `''`, "illegal rune literal"},
+       {`'12'`, token.CHAR, 0, `'12'`, "illegal rune literal"},
         {`'123'`, token.CHAR, 0, `'123'`, "illegal rune literal"},
+       {`'\0'`, token.CHAR, 3, `'\0'`, "illegal character U+0027 ''' in escape sequence"},
+       {`'\07'`, token.CHAR, 4, `'\07'`, "illegal character U+0027 ''' in escape sequence"},
         {`'\8'`, token.CHAR, 2, `'\8'`, "unknown escape sequence"},
-       {`'\08'`, token.CHAR, 3, `'\08'`, "illegal character in escape sequence"},
-       {`'\x0g'`, token.CHAR, 4, `'\x0g'`, "illegal character in escape sequence"},
+       {`'\08'`, token.CHAR, 3, `'\08'`, "illegal character U+0038 '8' in escape sequence"},
+       {`'\x'`, token.CHAR, 3, `'\x'`, "illegal character U+0027 ''' in escape sequence"},
+       {`'\x0'`, token.CHAR, 4, `'\x0'`, "illegal character U+0027 ''' in escape sequence"},
+       {`'\x0g'`, token.CHAR, 4, `'\x0g'`, "illegal character U+0067 'g' in escape sequence"},
+       {`'\u'`, token.CHAR, 3, `'\u'`, "illegal character U+0027 ''' in escape sequence"},
+       {`'\u0'`, token.CHAR, 4, `'\u0'`, "illegal character U+0027 ''' in escape sequence"},
+       {`'\u00'`, token.CHAR, 5, `'\u00'`, "illegal character U+0027 ''' in escape sequence"},
+       {`'\u000'`, token.CHAR, 6, `'\u000'`, "illegal character U+0027 ''' in escape sequence"},
+       {`'\u000`, token.CHAR, 6, `'\u000`, "escape sequence not terminated"},
+       {`'\u0000'`, token.CHAR, 0, `'\u0000'`, ""},
+       {`'\U'`, token.CHAR, 3, `'\U'`, "illegal character U+0027 ''' in escape sequence"},
+       {`'\U0'`, token.CHAR, 4, `'\U0'`, "illegal character U+0027 ''' in escape sequence"},
+       {`'\U00'`, token.CHAR, 5, `'\U00'`, "illegal character U+0027 ''' in escape sequence"},
+       {`'\U000'`, token.CHAR, 6, `'\U000'`, "illegal character U+0027 ''' in escape sequence"},
+       {`'\U0000'`, token.CHAR, 7, `'\U0000'`, "illegal character U+0027 ''' in escape sequence"},
+       {`'\U00000'`, token.CHAR, 8, `'\U00000'`, "illegal character U+0027 ''' in escape sequence"},
+       {`'\U000000'`, token.CHAR, 9, `'\U000000'`, "illegal character U+0027 ''' in escape sequence"},
+       {`'\U0000000'`, token.CHAR, 10, `'\U0000000'`, "illegal character U+0027 ''' in escape sequence"},
+       {`'\U0000000`, token.CHAR, 10, `'\U0000000`, "escape sequence not terminated"},
+       {`'\U00000000'`, token.CHAR, 0, `'\U00000000'`, ""},
         {`'\Uffffffff'`, token.CHAR, 2, `'\Uffffffff'`, "escape sequence is invalid Unicode code point"},
         {`'`, token.CHAR, 0, `'`, "rune literal not terminated"},
+       {`'\`, token.CHAR, 2, `'\`, "escape sequence not terminated"},
         {"'\n", token.CHAR, 0, "'", "rune literal not terminated"},
         {"'\n   ", token.CHAR, 0, "'", "rune literal not terminated"},
         {`""`, token.STRING, 0, `""`, ""},
author	Robert Griesemer <gri@golang.org>
	Wed, 15 Jan 2014 17:50:55 +0000 (09:50 -0800)
committer	Robert Griesemer <gri@golang.org>
	Wed, 15 Jan 2014 17:50:55 +0000 (09:50 -0800)
src/pkg/go/scanner/scanner.go		patch \| blob \| history
src/pkg/go/scanner/scanner_test.go		patch \| blob \| history