]> Cypherpunks repositories - gostls13.git/commitdiff
go/scanner: add End method to Scanner
authorMateusz Poliwczak <mpoliwczak34@gmail.com>
Fri, 23 Jan 2026 08:09:49 +0000 (09:09 +0100)
committerMateusz Poliwczak <mpoliwczak34@gmail.com>
Thu, 29 Jan 2026 06:44:07 +0000 (22:44 -0800)
Fixes #74958

Change-Id: I50ef64ae9ae6a762a7aada3d29914bae6a6a6964
Reviewed-on: https://go-review.googlesource.com/c/go/+/738681
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Alan Donovan <adonovan@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

api/next/74958.txt [new file with mode: 0644]
doc/next/6-stdlib/99-minor/go/scanner/74958.md [new file with mode: 0644]
src/go/scanner/scanner.go
src/go/scanner/scanner_test.go

diff --git a/api/next/74958.txt b/api/next/74958.txt
new file mode 100644 (file)
index 0000000..964120e
--- /dev/null
@@ -0,0 +1 @@
+pkg go/scanner, method (*Scanner) End() token.Pos #74958
diff --git a/doc/next/6-stdlib/99-minor/go/scanner/74958.md b/doc/next/6-stdlib/99-minor/go/scanner/74958.md
new file mode 100644 (file)
index 0000000..5d97893
--- /dev/null
@@ -0,0 +1 @@
+The scanner now allows retrieving the end position of a token via the new [Scanner.End] method.
index f8610640c055482e703a3b1dc9dbd326dd9994d6..3bacd510e2ed7e33f8f9613e918275c97068f9c1 100644 (file)
@@ -44,6 +44,9 @@ type Scanner struct {
        nlPos      token.Pos // position of newline in preceding comment
        stringEnd  token.Pos // end position; defined only for STRING tokens
 
+       endPosValid bool
+       endPos      token.Pos // overrides the offset as the default end position
+
        // public state - ok to modify
        ErrorCount int // number of errors encountered
 }
@@ -154,7 +157,9 @@ func (s *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode Mode
                err:  err,
                mode: mode,
 
-               ch: ' ',
+               ch:          ' ',
+               endPosValid: true,
+               endPos:      token.NoPos,
        }
 
        s.next()
@@ -777,6 +782,21 @@ func (s *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Tok
        return tok0
 }
 
+// End returns the position immediately after the last scanned token.
+// If [Scanner.Scan] has not been called yet, End returns [token.NoPos].
+func (s *Scanner) End() token.Pos {
+       // Handles special case:
+       // - Makes sure we return [token.NoPos], even when [Scanner.Init] has consumed a BOM.
+       // - When the previous token was a synthetic [token.SEMICOLON] inside a multi-line
+       //   comment, we make sure End returns its ending position (i.e. prevPos+len("\n")).
+       if s.endPosValid {
+               return s.endPos
+       }
+
+       // Normal case: s.file.Pos(s.offset) represents the end of the token
+       return s.file.Pos(s.offset)
+}
+
 // Scan scans the next token and returns the token position, the token,
 // and its literal string if applicable. The source end is indicated by
 // [token.EOF].
@@ -809,10 +829,13 @@ func (s *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Tok
 // and thus relative to the file set.
 func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
 scanAgain:
+       s.endPosValid = false
        if s.nlPos.IsValid() {
                // Return artificial ';' token after /*...*/ comment
                // containing newline, at position of first newline.
                pos, tok, lit = s.nlPos, token.SEMICOLON, "\n"
+               s.endPos = pos + 1
+               s.endPosValid = true
                s.nlPos = token.NoPos
                return
        }
index ae2cc633923d5b6f23f536f6dbc5411aa164339c..118bcdef0c4d9a06a512bb9219efe18bc10d37d1 100644 (file)
@@ -1179,3 +1179,167 @@ func TestScanReuseSemiInNewlineComment(t *testing.T) {
                t.Fatalf("tok = %v; want = token.IDENT", tok)
        }
 }
+
+func TestScannerEnd(t *testing.T) {
+       type tok struct {
+               tok   token.Token
+               start token.Pos
+               end   token.Pos
+       }
+
+       cases := []struct {
+               name string
+               src  string
+               end  []tok
+       }{
+               {
+                       name: "operators",
+                       src:  "+ - / >> == =",
+                       end: []tok{
+                               {token.ADD, 1, 2},
+                               {token.SUB, 3, 4},
+                               {token.QUO, 5, 6},
+                               {token.SHR, 7, 9},
+                               {token.EQL, 10, 12},
+                               {token.ASSIGN, 13, 14},
+                               {token.EOF, 14, 14},
+                       },
+               },
+               {
+                       name: "braces",
+                       src:  "{([])}",
+                       end: []tok{
+                               {token.LBRACE, 1, 2},
+                               {token.LPAREN, 2, 3},
+                               {token.LBRACK, 3, 4},
+                               {token.RBRACK, 4, 5},
+                               {token.RPAREN, 5, 6},
+                               {token.RBRACE, 6, 7},
+                               {token.SEMICOLON, 7, 7},
+                               {token.EOF, 7, 7},
+                       },
+               },
+               {
+                       name: "literals",
+                       src:  `"foo" 123 1.23 0b11`,
+                       end: []tok{
+                               {token.STRING, 1, 6},
+                               {token.INT, 7, 10},
+                               {token.FLOAT, 11, 15},
+                               {token.INT, 16, 20},
+                               {token.SEMICOLON, 20, 20},
+                               {token.EOF, 20, 20},
+                       },
+               },
+               {
+                       name: "missing newline at the end of file",
+                       src:  "foo",
+                       end: []tok{
+                               {token.IDENT, 1, 4},
+                               {token.SEMICOLON, 4, 4},
+                               {token.EOF, 4, 4},
+                       },
+               },
+               {
+                       name: "newline at the end of file",
+                       src:  "foo\n",
+                       end: []tok{
+                               {token.IDENT, 1, 4},
+                               {token.SEMICOLON, 4, 5},
+                               {token.EOF, 5, 5},
+                       },
+               },
+               {
+                       name: "semicolon at the end of file",
+                       src:  "foo;",
+                       end: []tok{
+                               {token.IDENT, 1, 4},
+                               {token.SEMICOLON, 4, 5},
+                               {token.EOF, 5, 5},
+                       },
+               },
+               {
+                       name: "semicolon and newline at the end of file",
+                       src:  "foo;\n",
+                       end: []tok{
+                               {token.IDENT, 1, 4},
+                               {token.SEMICOLON, 4, 5},
+                               {token.EOF, 6, 6},
+                       },
+               },
+               {
+                       name: "newline in comment acting as semicolon",
+                       src:  "foo /*\n*/ bar",
+                       end: []tok{
+                               {token.IDENT, 1, 4},
+                               {token.COMMENT, 5, 10},
+                               {token.SEMICOLON, 7, 8},
+                               {token.IDENT, 11, 14},
+                               {token.SEMICOLON, 14, 14},
+                               {token.EOF, 14, 14},
+                       },
+               },
+               {
+                       name: "BOM",
+                       src:  "\uFEFFfoo",
+                       end: []tok{
+                               {token.IDENT, 4, 7},
+                               {token.SEMICOLON, 7, 7},
+                               {token.EOF, 7, 7},
+                       },
+               },
+       }
+
+       for _, tt := range cases {
+               t.Run(tt.name, func(t *testing.T) {
+                       fset := token.NewFileSet()
+
+                       var s Scanner
+                       errorHandler := func(_ token.Position, msg string) { t.Fatal(msg) }
+                       s.Init(fset.AddFile("test.go", -1, len(tt.src)), []byte(tt.src), errorHandler, ScanComments)
+
+                       if end := s.End(); end != token.NoPos {
+                               t.Errorf("after init: s.End() = %v; want token.NoPos", end)
+                       }
+
+                       var got []tok
+                       for {
+                               pos, tokTyp, _ := s.Scan()
+                               got = append(got, tok{tokTyp, pos, s.End()})
+                               if tokTyp == token.EOF {
+                                       break
+                               }
+                       }
+
+                       if !slices.Equal(got, tt.end) {
+                               t.Fatalf("input %q: got = %v; want = %v", tt.src, got, tt.end)
+                       }
+               })
+       }
+}
+
+func TestScannerEndReuse(t *testing.T) {
+       fset := token.NewFileSet()
+
+       const src = "identifier /*a\nb*/ + other"
+       var s Scanner
+       s.Init(fset.AddFile("test.go", -1, len(src)), []byte(src), func(pos token.Position, msg string) {
+               t.Fatal(msg)
+       }, ScanComments)
+
+       s.Scan() // IDENT(identifier)
+       s.Scan() // COMMENT(/*a\n*b/)
+
+       _, tok, _ := s.Scan() // SEMICOLON
+       if tok != token.SEMICOLON {
+               t.Fatalf("tok = %v; want = token.SEMICOLON", tok)
+       }
+
+       s.Init(fset.AddFile("test.go", -1, len(src)), []byte(src), func(pos token.Position, msg string) {
+               t.Fatal(msg)
+       }, ScanComments)
+
+       if end := s.End(); end != token.NoPos {
+               t.Errorf("s.End() = %v; want token.NoPos", end)
+       }
+}