From: Mateusz Poliwczak Date: Fri, 23 Jan 2026 08:09:49 +0000 (+0100) Subject: go/scanner: add End method to Scanner X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=3c924059e66427eface8d357adcf1566e3be847a;p=gostls13.git go/scanner: add End method to Scanner Fixes #74958 Change-Id: I50ef64ae9ae6a762a7aada3d29914bae6a6a6964 Reviewed-on: https://go-review.googlesource.com/c/go/+/738681 Reviewed-by: Dmitri Shuralyov Reviewed-by: Alan Donovan LUCI-TryBot-Result: Go LUCI --- diff --git a/api/next/74958.txt b/api/next/74958.txt new file mode 100644 index 0000000000..964120ed6f --- /dev/null +++ b/api/next/74958.txt @@ -0,0 +1 @@ +pkg go/scanner, method (*Scanner) End() token.Pos #74958 diff --git a/doc/next/6-stdlib/99-minor/go/scanner/74958.md b/doc/next/6-stdlib/99-minor/go/scanner/74958.md new file mode 100644 index 0000000000..5d97893482 --- /dev/null +++ b/doc/next/6-stdlib/99-minor/go/scanner/74958.md @@ -0,0 +1 @@ +The scanner now allows retrieving the end position of a token via the new [Scanner.End] method. diff --git a/src/go/scanner/scanner.go b/src/go/scanner/scanner.go index f8610640c0..3bacd510e2 100644 --- a/src/go/scanner/scanner.go +++ b/src/go/scanner/scanner.go @@ -44,6 +44,9 @@ type Scanner struct { nlPos token.Pos // position of newline in preceding comment stringEnd token.Pos // end position; defined only for STRING tokens + endPosValid bool + endPos token.Pos // overrides the offset as the default end position + // public state - ok to modify ErrorCount int // number of errors encountered } @@ -154,7 +157,9 @@ func (s *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode Mode err: err, mode: mode, - ch: ' ', + ch: ' ', + endPosValid: true, + endPos: token.NoPos, } s.next() @@ -777,6 +782,21 @@ func (s *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Tok return tok0 } +// End returns the position immediately after the last scanned token. +// If [Scanner.Scan] has not been called yet, End returns [token.NoPos]. +func (s *Scanner) End() token.Pos { + // Handles special case: + // - Makes sure we return [token.NoPos], even when [Scanner.Init] has consumed a BOM. + // - When the previous token was a synthetic [token.SEMICOLON] inside a multi-line + // comment, we make sure End returns its ending position (i.e. prevPos+len("\n")). + if s.endPosValid { + return s.endPos + } + + // Normal case: s.file.Pos(s.offset) represents the end of the token + return s.file.Pos(s.offset) +} + // Scan scans the next token and returns the token position, the token, // and its literal string if applicable. The source end is indicated by // [token.EOF]. @@ -809,10 +829,13 @@ func (s *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Tok // and thus relative to the file set. func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) { scanAgain: + s.endPosValid = false if s.nlPos.IsValid() { // Return artificial ';' token after /*...*/ comment // containing newline, at position of first newline. pos, tok, lit = s.nlPos, token.SEMICOLON, "\n" + s.endPos = pos + 1 + s.endPosValid = true s.nlPos = token.NoPos return } diff --git a/src/go/scanner/scanner_test.go b/src/go/scanner/scanner_test.go index ae2cc63392..118bcdef0c 100644 --- a/src/go/scanner/scanner_test.go +++ b/src/go/scanner/scanner_test.go @@ -1179,3 +1179,167 @@ func TestScanReuseSemiInNewlineComment(t *testing.T) { t.Fatalf("tok = %v; want = token.IDENT", tok) } } + +func TestScannerEnd(t *testing.T) { + type tok struct { + tok token.Token + start token.Pos + end token.Pos + } + + cases := []struct { + name string + src string + end []tok + }{ + { + name: "operators", + src: "+ - / >> == =", + end: []tok{ + {token.ADD, 1, 2}, + {token.SUB, 3, 4}, + {token.QUO, 5, 6}, + {token.SHR, 7, 9}, + {token.EQL, 10, 12}, + {token.ASSIGN, 13, 14}, + {token.EOF, 14, 14}, + }, + }, + { + name: "braces", + src: "{([])}", + end: []tok{ + {token.LBRACE, 1, 2}, + {token.LPAREN, 2, 3}, + {token.LBRACK, 3, 4}, + {token.RBRACK, 4, 5}, + {token.RPAREN, 5, 6}, + {token.RBRACE, 6, 7}, + {token.SEMICOLON, 7, 7}, + {token.EOF, 7, 7}, + }, + }, + { + name: "literals", + src: `"foo" 123 1.23 0b11`, + end: []tok{ + {token.STRING, 1, 6}, + {token.INT, 7, 10}, + {token.FLOAT, 11, 15}, + {token.INT, 16, 20}, + {token.SEMICOLON, 20, 20}, + {token.EOF, 20, 20}, + }, + }, + { + name: "missing newline at the end of file", + src: "foo", + end: []tok{ + {token.IDENT, 1, 4}, + {token.SEMICOLON, 4, 4}, + {token.EOF, 4, 4}, + }, + }, + { + name: "newline at the end of file", + src: "foo\n", + end: []tok{ + {token.IDENT, 1, 4}, + {token.SEMICOLON, 4, 5}, + {token.EOF, 5, 5}, + }, + }, + { + name: "semicolon at the end of file", + src: "foo;", + end: []tok{ + {token.IDENT, 1, 4}, + {token.SEMICOLON, 4, 5}, + {token.EOF, 5, 5}, + }, + }, + { + name: "semicolon and newline at the end of file", + src: "foo;\n", + end: []tok{ + {token.IDENT, 1, 4}, + {token.SEMICOLON, 4, 5}, + {token.EOF, 6, 6}, + }, + }, + { + name: "newline in comment acting as semicolon", + src: "foo /*\n*/ bar", + end: []tok{ + {token.IDENT, 1, 4}, + {token.COMMENT, 5, 10}, + {token.SEMICOLON, 7, 8}, + {token.IDENT, 11, 14}, + {token.SEMICOLON, 14, 14}, + {token.EOF, 14, 14}, + }, + }, + { + name: "BOM", + src: "\uFEFFfoo", + end: []tok{ + {token.IDENT, 4, 7}, + {token.SEMICOLON, 7, 7}, + {token.EOF, 7, 7}, + }, + }, + } + + for _, tt := range cases { + t.Run(tt.name, func(t *testing.T) { + fset := token.NewFileSet() + + var s Scanner + errorHandler := func(_ token.Position, msg string) { t.Fatal(msg) } + s.Init(fset.AddFile("test.go", -1, len(tt.src)), []byte(tt.src), errorHandler, ScanComments) + + if end := s.End(); end != token.NoPos { + t.Errorf("after init: s.End() = %v; want token.NoPos", end) + } + + var got []tok + for { + pos, tokTyp, _ := s.Scan() + got = append(got, tok{tokTyp, pos, s.End()}) + if tokTyp == token.EOF { + break + } + } + + if !slices.Equal(got, tt.end) { + t.Fatalf("input %q: got = %v; want = %v", tt.src, got, tt.end) + } + }) + } +} + +func TestScannerEndReuse(t *testing.T) { + fset := token.NewFileSet() + + const src = "identifier /*a\nb*/ + other" + var s Scanner + s.Init(fset.AddFile("test.go", -1, len(src)), []byte(src), func(pos token.Position, msg string) { + t.Fatal(msg) + }, ScanComments) + + s.Scan() // IDENT(identifier) + s.Scan() // COMMENT(/*a\n*b/) + + _, tok, _ := s.Scan() // SEMICOLON + if tok != token.SEMICOLON { + t.Fatalf("tok = %v; want = token.SEMICOLON", tok) + } + + s.Init(fset.AddFile("test.go", -1, len(src)), []byte(src), func(pos token.Position, msg string) { + t.Fatal(msg) + }, ScanComments) + + if end := s.End(); end != token.NoPos { + t.Errorf("s.End() = %v; want token.NoPos", end) + } +}