From 855986d54f7831fa45f3b30cb9732d0d5758fc88 Mon Sep 17 00:00:00 2001 From: Robert Griesemer Date: Tue, 16 Feb 2010 17:39:44 -0800 Subject: [PATCH] go/scanner: comply with spec changes (do not allow NUL chars) and complain about illegal UTF-8 code sequences R=rsc CC=golang-dev https://golang.org/cl/209043 --- src/pkg/go/scanner/scanner.go | 5 +++++ src/pkg/go/scanner/scanner_test.go | 35 ++++++++++++++++-------------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/src/pkg/go/scanner/scanner.go b/src/pkg/go/scanner/scanner.go index 7a21205a95..b2e120179d 100644 --- a/src/pkg/go/scanner/scanner.go +++ b/src/pkg/go/scanner/scanner.go @@ -48,12 +48,17 @@ func (S *Scanner) next() { S.pos.Column++ r, w := int(S.src[S.offset]), 1 switch { + case r == 0: + S.error(S.pos, "illegal character NUL") case r == '\n': S.pos.Line++ S.pos.Column = 0 case r >= 0x80: // not ASCII r, w = utf8.DecodeRune(S.src[S.offset:]) + if r == utf8.RuneError && w == 1 { + S.error(S.pos, "illegal UTF-8 encoding") + } } S.offset += w S.ch = r diff --git a/src/pkg/go/scanner/scanner_test.go b/src/pkg/go/scanner/scanner_test.go index 56091a39f8..762252488a 100644 --- a/src/pkg/go/scanner/scanner_test.go +++ b/src/pkg/go/scanner/scanner_test.go @@ -551,7 +551,7 @@ func (h *errorCollector) Error(pos token.Position, msg string) { } -func checkError(t *testing.T, src string, tok token.Token, err string) { +func checkError(t *testing.T, src string, tok token.Token, pos int, err string) { var s Scanner var h errorCollector s.Init("", strings.Bytes(src), &h, ScanComments) @@ -573,8 +573,8 @@ func checkError(t *testing.T, src string, tok token.Token, err string) { if h.msg != err { t.Errorf("%q: got msg %q, expected %q", src, h.msg, err) } - if h.pos.Offset != 0 { - t.Errorf("%q: got offset %d, expected 0", src, h.pos.Offset) + if h.pos.Offset != pos { + t.Errorf("%q: got offset %d, expected %d", src, h.pos.Offset, pos) } } @@ -582,27 +582,30 @@ func checkError(t *testing.T, src string, tok token.Token, err string) { type srcerr struct { src string tok token.Token + pos int err string } var errors = []srcerr{ - srcerr{"\"\"", token.STRING, ""}, - srcerr{"\"", token.STRING, "string not terminated"}, - srcerr{"/**/", token.COMMENT, ""}, - srcerr{"/*", token.COMMENT, "comment not terminated"}, - srcerr{"//\n", token.COMMENT, ""}, - srcerr{"//", token.COMMENT, "comment not terminated"}, - srcerr{"077", token.INT, ""}, - srcerr{"078.", token.FLOAT, ""}, - srcerr{"07801234567.", token.FLOAT, ""}, - srcerr{"078e0", token.FLOAT, ""}, - srcerr{"078", token.INT, "illegal octal number"}, - srcerr{"07800000009", token.INT, "illegal octal number"}, + srcerr{"\"\"", token.STRING, 0, ""}, + srcerr{"\"", token.STRING, 0, "string not terminated"}, + srcerr{"/**/", token.COMMENT, 0, ""}, + srcerr{"/*", token.COMMENT, 0, "comment not terminated"}, + srcerr{"//\n", token.COMMENT, 0, ""}, + srcerr{"//", token.COMMENT, 0, "comment not terminated"}, + srcerr{"077", token.INT, 0, ""}, + srcerr{"078.", token.FLOAT, 0, ""}, + srcerr{"07801234567.", token.FLOAT, 0, ""}, + srcerr{"078e0", token.FLOAT, 0, ""}, + srcerr{"078", token.INT, 0, "illegal octal number"}, + srcerr{"07800000009", token.INT, 0, "illegal octal number"}, + srcerr{"\"abc\x00def\"", token.STRING, 4, "illegal character NUL"}, + srcerr{"\"abc\x80def\"", token.STRING, 4, "illegal UTF-8 encoding"}, } func TestScanErrors(t *testing.T) { for _, e := range errors { - checkError(t, e.src, e.tok, e.err) + checkError(t, e.src, e.tok, e.pos, e.err) } } -- 2.50.0