From fc80ce81946b009dd826e260fe4fc1fbcc19f133 Mon Sep 17 00:00:00 2001 From: Robert Griesemer Date: Wed, 15 Jan 2014 09:50:55 -0800 Subject: [PATCH] go/scanner: report too short escape sequences Generally improve error messages for escape sequences. R=adonovan CC=golang-codereviews https://golang.org/cl/49430046 --- src/pkg/go/scanner/scanner.go | 62 ++++++++++++++++++++---------- src/pkg/go/scanner/scanner_test.go | 30 ++++++++++++--- 2 files changed, 65 insertions(+), 27 deletions(-) diff --git a/src/pkg/go/scanner/scanner.go b/src/pkg/go/scanner/scanner.go index 073bebd36d..25588ba3b0 100644 --- a/src/pkg/go/scanner/scanner.go +++ b/src/pkg/go/scanner/scanner.go @@ -358,60 +358,77 @@ exit: return tok, string(s.src[offs:s.offset]) } -func (s *Scanner) scanEscape(quote rune) { +// scanEscape parses an escape sequence where rune is the accepted +// escaped quote. In case of a syntax error, it stops at the offending +// character (without consuming it) and returns false. Otherwise +// it returns true. +func (s *Scanner) scanEscape(quote rune) bool { offs := s.offset - var i, base, max uint32 + var n int + var base, max uint32 switch s.ch { case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote: s.next() - return + return true case '0', '1', '2', '3', '4', '5', '6', '7': - i, base, max = 3, 8, 255 + n, base, max = 3, 8, 255 case 'x': s.next() - i, base, max = 2, 16, 255 + n, base, max = 2, 16, 255 case 'u': s.next() - i, base, max = 4, 16, unicode.MaxRune + n, base, max = 4, 16, unicode.MaxRune case 'U': s.next() - i, base, max = 8, 16, unicode.MaxRune + n, base, max = 8, 16, unicode.MaxRune default: - s.next() // always make progress - s.error(offs, "unknown escape sequence") - return + msg := "unknown escape sequence" + if s.ch < 0 { + msg = "escape sequence not terminated" + } + s.error(offs, msg) + return false } var x uint32 - for ; i > 0 && s.ch != quote && s.ch >= 0; i-- { + for n > 0 { d := uint32(digitVal(s.ch)) if d >= base { - s.error(s.offset, "illegal character in escape sequence") - break + msg := fmt.Sprintf("illegal character %#U in escape sequence", s.ch) + if s.ch < 0 { + msg = "escape sequence not terminated" + } + s.error(s.offset, msg) + return false } x = x*base + d s.next() + n-- } - // in case of an error, consume remaining chars - for ; i > 0 && s.ch != quote && s.ch >= 0; i-- { - s.next() - } + if x > max || 0xD800 <= x && x < 0xE000 { s.error(offs, "escape sequence is invalid Unicode code point") + return false } + + return true } func (s *Scanner) scanRune() string { // '\'' opening already consumed offs := s.offset - 1 + valid := true n := 0 for { ch := s.ch if ch == '\n' || ch < 0 { - s.error(offs, "rune literal not terminated") - n = 1 // avoid further errors + // only report error if we don't have one already + if valid { + s.error(offs, "rune literal not terminated") + valid = false + } break } s.next() @@ -420,11 +437,14 @@ func (s *Scanner) scanRune() string { } n++ if ch == '\\' { - s.scanEscape('\'') + if !s.scanEscape('\'') { + valid = false + } + // continue to read to closing quote } } - if n != 1 { + if valid && n != 1 { s.error(offs, "illegal rune literal") } diff --git a/src/pkg/go/scanner/scanner_test.go b/src/pkg/go/scanner/scanner_test.go index a26785ebc4..e0d0b54f68 100644 --- a/src/pkg/go/scanner/scanner_test.go +++ b/src/pkg/go/scanner/scanner_test.go @@ -641,13 +641,9 @@ func checkError(t *testing.T, src string, tok token.Token, pos int, lit, err str } s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), eh, ScanComments|dontInsertSemis) _, tok0, lit0 := s.Scan() - _, tok1, _ := s.Scan() if tok0 != tok { t.Errorf("%q: got %s, expected %s", src, tok0, tok) } - if tok1 != token.EOF { - t.Errorf("%q: got %s, expected EOF", src, tok1) - } if tok0 != token.ILLEGAL && lit0 != lit { t.Errorf("%q: got literal %q, expected %q", src, lit0, lit) } @@ -678,12 +674,34 @@ var errors = []struct { {`…`, token.ILLEGAL, 0, "", "illegal character U+2026 '…'"}, {`' '`, token.CHAR, 0, `' '`, ""}, {`''`, token.CHAR, 0, `''`, "illegal rune literal"}, + {`'12'`, token.CHAR, 0, `'12'`, "illegal rune literal"}, {`'123'`, token.CHAR, 0, `'123'`, "illegal rune literal"}, + {`'\0'`, token.CHAR, 3, `'\0'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\07'`, token.CHAR, 4, `'\07'`, "illegal character U+0027 ''' in escape sequence"}, {`'\8'`, token.CHAR, 2, `'\8'`, "unknown escape sequence"}, - {`'\08'`, token.CHAR, 3, `'\08'`, "illegal character in escape sequence"}, - {`'\x0g'`, token.CHAR, 4, `'\x0g'`, "illegal character in escape sequence"}, + {`'\08'`, token.CHAR, 3, `'\08'`, "illegal character U+0038 '8' in escape sequence"}, + {`'\x'`, token.CHAR, 3, `'\x'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\x0'`, token.CHAR, 4, `'\x0'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\x0g'`, token.CHAR, 4, `'\x0g'`, "illegal character U+0067 'g' in escape sequence"}, + {`'\u'`, token.CHAR, 3, `'\u'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\u0'`, token.CHAR, 4, `'\u0'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\u00'`, token.CHAR, 5, `'\u00'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\u000'`, token.CHAR, 6, `'\u000'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\u000`, token.CHAR, 6, `'\u000`, "escape sequence not terminated"}, + {`'\u0000'`, token.CHAR, 0, `'\u0000'`, ""}, + {`'\U'`, token.CHAR, 3, `'\U'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\U0'`, token.CHAR, 4, `'\U0'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\U00'`, token.CHAR, 5, `'\U00'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\U000'`, token.CHAR, 6, `'\U000'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\U0000'`, token.CHAR, 7, `'\U0000'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\U00000'`, token.CHAR, 8, `'\U00000'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\U000000'`, token.CHAR, 9, `'\U000000'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\U0000000'`, token.CHAR, 10, `'\U0000000'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\U0000000`, token.CHAR, 10, `'\U0000000`, "escape sequence not terminated"}, + {`'\U00000000'`, token.CHAR, 0, `'\U00000000'`, ""}, {`'\Uffffffff'`, token.CHAR, 2, `'\Uffffffff'`, "escape sequence is invalid Unicode code point"}, {`'`, token.CHAR, 0, `'`, "rune literal not terminated"}, + {`'\`, token.CHAR, 2, `'\`, "escape sequence not terminated"}, {"'\n", token.CHAR, 0, "'", "rune literal not terminated"}, {"'\n ", token.CHAR, 0, "'", "rune literal not terminated"}, {`""`, token.STRING, 0, `""`, ""}, -- 2.48.1