]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile/internal/syntax: improved scanner tests
authorRobert Griesemer <gri@golang.org>
Tue, 11 Feb 2020 05:32:04 +0000 (21:32 -0800)
committerRobert Griesemer <gri@golang.org>
Wed, 4 Mar 2020 23:08:10 +0000 (23:08 +0000)
This is one of several changes that were part of a larger rewrite
which I made in early 2019 after switching to the new number literal
syntax implementation. The purpose of the rewrite was to simplify
reading of source code (Unicode character by character) and speed up
the scanner but was never submitted for review due to other priorities.

Part 1 of 3:

This change contains improvements to the scanner tests.

Change-Id: Iecfcaef00fdeb690b0db786edbd52e828417141b
Reviewed-on: https://go-review.googlesource.com/c/go/+/221601
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
src/cmd/compile/internal/syntax/scanner_test.go

index d76231a4af541e74d9c4a19555f881e252d7193a..612c59507e642e081ec597632291708144b1f7cb 100644 (file)
@@ -12,19 +12,59 @@ import (
        "testing"
 )
 
+// errh is a default error handler for basic tests.
+func errh(line, col uint, msg string) {
+       panic(fmt.Sprintf("%d:%d: %s", line, col, msg))
+}
+
+// Don't bother with other tests if TestSmoke doesn't pass.
+func TestSmoke(t *testing.T) {
+       const src = "if (+foo\t+=..123/***/4.2_0e-0i'a'`raw`\"string\" ;//$"
+       tokens := []token{_If, _Lparen, _Operator, _Name, _AssignOp, _Dot, _Literal, _Literal, _Literal, _Literal, _Literal, _Semi, _EOF}
+
+       var got scanner
+       got.init(strings.NewReader(src), errh, 0)
+       for _, want := range tokens {
+               got.next()
+               if got.tok != want {
+                       t.Errorf("%d:%d: got %s; want %s", got.line, got.col, got.tok, want)
+                       continue
+               }
+       }
+}
+
+// Once TestSmoke passes, run TestTokens next.
+func TestTokens(t *testing.T) {
+       var got scanner
+       for _, want := range sampleTokens {
+               got.init(strings.NewReader(want.src), func(line, col uint, msg string) {
+                       t.Errorf("%s:%d:%d: %s", want.src, line, col, msg)
+               }, 0)
+               got.next()
+               if got.tok != want.tok {
+                       t.Errorf("%s: got %s; want %s", want.src, got.tok, want.tok)
+                       continue
+               }
+               if (got.tok == _Name || got.tok == _Literal) && got.lit != want.src {
+                       t.Errorf("%s: got %q; want %q", want.src, got.lit, want.src)
+               }
+       }
+}
+
 func TestScanner(t *testing.T) {
        if testing.Short() {
                t.Skip("skipping test in short mode")
        }
 
-       src, err := os.Open("parser.go")
+       filename := *src_ // can be changed via -src flag
+       src, err := os.Open(filename)
        if err != nil {
                t.Fatal(err)
        }
        defer src.Close()
 
        var s scanner
-       s.init(src, nil, 0)
+       s.init(src, errh, 0)
        for {
                s.next()
                if s.tok == _EOF {
@@ -34,64 +74,66 @@ func TestScanner(t *testing.T) {
                        continue
                }
                switch s.tok {
-               case _Name:
-                       fmt.Println(s.line, s.tok, "=>", s.lit)
+               case _Name, _Literal:
+                       fmt.Printf("%s:%d:%d: %s => %s\n", filename, s.line, s.col, s.tok, s.lit)
                case _Operator:
-                       fmt.Println(s.line, s.tok, "=>", s.op, s.prec)
+                       fmt.Printf("%s:%d:%d: %s => %s (prec = %d)\n", filename, s.line, s.col, s.tok, s.op, s.prec)
                default:
-                       fmt.Println(s.line, s.tok)
+                       fmt.Printf("%s:%d:%d: %s\n", filename, s.line, s.col, s.tok)
                }
        }
 }
 
-func TestTokens(t *testing.T) {
+func TestEmbeddedTokens(t *testing.T) {
        // make source
        var buf bytes.Buffer
        for i, s := range sampleTokens {
-               buf.WriteString("\t\t\t\t"[:i&3])                          // leading indentation
-               buf.WriteString(s.src)                                     // token
-               buf.WriteString("        "[:i&7])                          // trailing spaces
-               fmt.Fprintf(&buf, "/*line foo:%d */ // bar\n", i+linebase) // comments (don't crash w/o directive handler)
+               buf.WriteString("\t\t\t\t"[:i&3])                            // leading indentation
+               buf.WriteString(s.src)                                       // token
+               buf.WriteString("        "[:i&7])                            // trailing spaces
+               buf.WriteString(fmt.Sprintf("/*line foo:%d */ // bar\n", i)) // comments + newline (don't crash w/o directive handler)
        }
 
        // scan source
        var got scanner
+       var src string
        got.init(&buf, func(line, col uint, msg string) {
-               t.Fatalf("%d:%d: %s", line, col, msg)
+               t.Fatalf("%s:%d:%d: %s", src, line, col, msg)
        }, 0)
        got.next()
        for i, want := range sampleTokens {
+               src = want.src
                nlsemi := false
 
-               if got.line != uint(i+linebase) {
-                       t.Errorf("got line %d; want %d", got.line, i+linebase)
+               if got.line-linebase != uint(i) {
+                       t.Errorf("%s: got line %d; want %d", src, got.line-linebase, i)
                }
 
                if got.tok != want.tok {
-                       t.Errorf("got tok = %s; want %s", got.tok, want.tok)
+                       t.Errorf("%s: got tok %s; want %s", src, got.tok, want.tok)
                        continue
                }
 
                switch want.tok {
                case _Semi:
                        if got.lit != "semicolon" {
-                               t.Errorf("got %s; want semicolon", got.lit)
+                               t.Errorf("%s: got %s; want semicolon", src, got.lit)
                        }
 
                case _Name, _Literal:
                        if got.lit != want.src {
-                               t.Errorf("got lit = %q; want %q", got.lit, want.src)
+                               t.Errorf("%s: got lit %q; want %q", src, got.lit, want.src)
                                continue
                        }
                        nlsemi = true
 
                case _Operator, _AssignOp, _IncOp:
                        if got.op != want.op {
-                               t.Errorf("got op = %s; want %s", got.op, want.op)
+                               t.Errorf("%s: got op %s; want %s", src, got.op, want.op)
                                continue
                        }
                        if got.prec != want.prec {
-                               t.Errorf("got prec = %d; want %d", got.prec, want.prec)
+                               t.Errorf("%s: got prec %d; want %d", src, got.prec, want.prec)
                                continue
                        }
                        nlsemi = want.tok == _IncOp
@@ -103,11 +145,11 @@ func TestTokens(t *testing.T) {
                if nlsemi {
                        got.next()
                        if got.tok != _Semi {
-                               t.Errorf("got tok = %s; want ;", got.tok)
+                               t.Errorf("%s: got tok %s; want ;", src, got.tok)
                                continue
                        }
                        if got.lit != "newline" {
-                               t.Errorf("got %s; want newline", got.lit)
+                               t.Errorf("%s: got %s; want newline", src, got.lit)
                        }
                }
 
@@ -299,7 +341,7 @@ func TestComments(t *testing.T) {
                {"//", comment{0, 0, "//"}},
 
                /*-style comments */
-               {"/* regular comment */", comment{0, 0, "/* regular comment */"}},
+               {"123/* regular comment */", comment{0, 3, "/* regular comment */"}},
                {"package p /* regular comment", comment{0, 0, ""}},
                {"\n\n\n/*\n*//* want this one */", comment{4, 2, "/* want this one */"}},
                {"\n\n/**/", comment{2, 0, "/**/"}},
@@ -307,17 +349,16 @@ func TestComments(t *testing.T) {
        } {
                var s scanner
                var got comment
-               s.init(strings.NewReader(test.src),
-                       func(line, col uint, msg string) {
-                               if msg[0] != '/' {
-                                       // error
-                                       if msg != "comment not terminated" {
-                                               t.Errorf("%q: %s", test.src, msg)
-                                       }
-                                       return
+               s.init(strings.NewReader(test.src), func(line, col uint, msg string) {
+                       if msg[0] != '/' {
+                               // error
+                               if msg != "comment not terminated" {
+                                       t.Errorf("%q: %s", test.src, msg)
                                }
-                               got = comment{line - linebase, col - colbase, msg} // keep last one
-                       }, comments)
+                               return
+                       }
+                       got = comment{line - linebase, col - colbase, msg} // keep last one
+               }, comments)
 
                for {
                        s.next()
@@ -542,7 +583,7 @@ func TestNumbers(t *testing.T) {
 
 func TestScanErrors(t *testing.T) {
        for _, test := range []struct {
-               src, msg  string
+               src, err  string
                line, col uint // 0-based
        }{
                // Note: Positions for lexical errors are the earliest position
@@ -582,7 +623,7 @@ func TestScanErrors(t *testing.T) {
                {`'xx`, "invalid character literal (missing closing ')", 0, 0},
                {`'xx'`, "invalid character literal (more than one character)", 0, 0},
 
-               {"\"\n", "newline in string", 0, 1},
+               {"\n   \"foo\n", "newline in string", 1, 7},
                {`"`, "string not terminated", 0, 0},
                {`"foo`, "string not terminated", 0, 0},
                {"`", "string not terminated", 0, 0},
@@ -607,27 +648,19 @@ func TestScanErrors(t *testing.T) {
                {`var s string = "\x"`, "non-hex character in escape sequence: \"", 0, 18},
                {`return "\Uffffffff"`, "escape sequence is invalid Unicode code point U+FFFFFFFF", 0, 18},
 
+               {"0b.0", "invalid radix point in binary literal", 0, 2},
+               {"0x.p0\n", "hexadecimal literal has no digits", 0, 3},
+
                // former problem cases
                {"package p\n\n\xef", "invalid UTF-8 encoding", 2, 0},
        } {
                var s scanner
-               nerrors := 0
-               s.init(strings.NewReader(test.src), func(line, col uint, msg string) {
-                       nerrors++
-                       // only check the first error
-                       if nerrors == 1 {
-                               if msg != test.msg {
-                                       t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg)
-                               }
-                               if line != test.line+linebase {
-                                       t.Errorf("%q: got line = %d; want %d", test.src, line, test.line+linebase)
-                               }
-                               if col != test.col+colbase {
-                                       t.Errorf("%q: got col = %d; want %d", test.src, col, test.col+colbase)
-                               }
-                       } else if nerrors > 1 {
-                               // TODO(gri) make this use position info
-                               t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line)
+               var line, col uint
+               var err string
+               s.init(strings.NewReader(test.src), func(l, c uint, msg string) {
+                       if err == "" {
+                               line, col = l-linebase, c-colbase
+                               err = msg
                        }
                }, 0)
 
@@ -638,8 +671,18 @@ func TestScanErrors(t *testing.T) {
                        }
                }
 
-               if nerrors == 0 {
-                       t.Errorf("%q: got no error; want %q", test.src, test.msg)
+               if err != "" {
+                       if err != test.err {
+                               t.Errorf("%q: got err = %q; want %q", test.src, err, test.err)
+                       }
+                       if line != test.line {
+                               t.Errorf("%q: got line = %d; want %d", test.src, line, test.line)
+                       }
+                       if col != test.col {
+                               t.Errorf("%q: got col = %d; want %d", test.src, col, test.col)
+                       }
+               } else {
+                       t.Errorf("%q: got no error; want %q", test.src, test.err)
                }
        }
 }
@@ -648,7 +691,7 @@ func TestIssue21938(t *testing.T) {
        s := "/*" + strings.Repeat(" ", 4089) + "*/ .5"
 
        var got scanner
-       got.init(strings.NewReader(s), nil, 0)
+       got.init(strings.NewReader(s), errh, 0)
        got.next()
 
        if got.tok != _Literal || got.lit != ".5" {