go/ast: fix BasicLit.End position for raw strings containing \r

author Alan Donovan <adonovan@google.com>

Wed, 12 Nov 2025 22:13:40 +0000 (17:13 -0500)

committer Alan Donovan <adonovan@google.com>

Mon, 17 Nov 2025 18:52:48 +0000 (10:52 -0800)
author Alan Donovan <adonovan@google.com>
Wed, 12 Nov 2025 22:13:40 +0000 (17:13 -0500)
committer Alan Donovan <adonovan@google.com>
Mon, 17 Nov 2025 18:52:48 +0000 (10:52 -0800)
diff --git a/api/next/76031.txt b/api/next/76031.txt

new file mode 100644 (file)

index 0000000..049edc7
--- /dev/null
+++ b/api/next/76031.txt
@@ -0,0 +1 @@
+pkg go/ast, type BasicLit struct, ValueEnd token.Pos #76031
diff --git a/doc/next/6-stdlib/99-minor/go/ast/76031.md b/doc/next/6-stdlib/99-minor/go/ast/76031.md

new file mode 100644 (file)

index 0000000..964872f
--- /dev/null
+++ b/doc/next/6-stdlib/99-minor/go/ast/76031.md
@@ -0,0 +1,5 @@
+The new [BasicLit.ValueEnd] field records the precise end position of
+a literal so that the [BasicLit.End] method can now always return the
+correct answer. (Previously it was computed using a heuristic that was
+incorrect for multi-line raw string literals in Windows source files,
+due to removal of carriage returns.)
diff --git a/src/go/ast/ast.go b/src/go/ast/ast.go

index a6dab5bb5171bc0a3931013a07dfd3c35fff43fe..37fc3c96662052538ed28edfc5d6dbc2fb9a120d 100644 (file)
--- a/src/go/ast/ast.go
+++ b/src/go/ast/ast.go
@@ -312,11 +312,10 @@ type (
         //
         // For raw string literals (Kind == token.STRING && Value[0] == '`'),
         // the Value field contains the string text without carriage returns (\r) that
-       // may have been present in the source. Because the end position is
-       // computed using len(Value), the position reported by [BasicLit.End] does not match the
-       // true source end position for raw string literals containing carriage returns.
+       // may have been present in the source.
         BasicLit struct {
                 ValuePos token.Pos   // literal position
+               ValueEnd token.Pos   // position immediately after the literal
                 Kind     token.Token // token.INT, token.FLOAT, token.IMAG, token.CHAR, or token.STRING
                 Value    string      // literal string; e.g. 42, 0x7f, 3.14, 1e-9, 2.4i, 'a', '\x7f', "foo" or `\m\n\o`
         }
@@ -535,7 +534,15 @@ func (x *Ellipsis) End() token.Pos {
         }
         return x.Ellipsis + 3 // len("...")
  }
-func (x *BasicLit) End() token.Pos       { return token.Pos(int(x.ValuePos) + len(x.Value)) }
+func (x *BasicLit) End() token.Pos {
+       if !x.ValueEnd.IsValid() {
+               // Not from parser; use a heuristic.
+               // (Incorrect for `...` containing \r\n;
+               // see https://go.dev/issue/76031.)
+               return token.Pos(int(x.ValuePos) + len(x.Value))
+       }
+       return x.ValueEnd
+}
  func (x *FuncLit) End() token.Pos        { return x.Body.End() }
  func (x *CompositeLit) End() token.Pos   { return x.Rbrace + 1 }
  func (x *ParenExpr) End() token.Pos      { return x.Rparen + 1 }
diff --git a/src/go/ast/commentmap_test.go b/src/go/ast/commentmap_test.go

index f0faeed610a1871ace48cae0018094d556bfd5aa..0d5e8de01376697aa2783e907738b0411c4542d3 100644 (file)
--- a/src/go/ast/commentmap_test.go
+++ b/src/go/ast/commentmap_test.go
@@ -109,7 +109,7 @@ func TestCommentMap(t *testing.T) {
         }
         cmap := NewCommentMap(fset, f, f.Comments)
  
-       // very correct association of comments
+       // verify correct association of comments
         for n, list := range cmap {
                 key := fmt.Sprintf("%2d: %T", fset.Position(n.Pos()).Line, n)
                 got := ctext(list)
diff --git a/src/go/ast/example_test.go b/src/go/ast/example_test.go

index 31b32efece993a8f4f19c7137d8d17d3aac8e25a..36daa7e7e1e6460b75801be9646253d5fec7451b 100644 (file)
--- a/src/go/ast/example_test.go
+++ b/src/go/ast/example_test.go
@@ -113,31 +113,32 @@ func main() {
         //     34  .  .  .  .  .  .  .  Args: []ast.Expr (len = 1) {
         //     35  .  .  .  .  .  .  .  .  0: *ast.BasicLit {
         //     36  .  .  .  .  .  .  .  .  .  ValuePos: 4:10
-       //     37  .  .  .  .  .  .  .  .  .  Kind: STRING
-       //     38  .  .  .  .  .  .  .  .  .  Value: "\"Hello, World!\""
-       //     39  .  .  .  .  .  .  .  .  }
-       //     40  .  .  .  .  .  .  .  }
-       //     41  .  .  .  .  .  .  .  Ellipsis: -
-       //     42  .  .  .  .  .  .  .  Rparen: 4:25
-       //     43  .  .  .  .  .  .  }
-       //     44  .  .  .  .  .  }
-       //     45  .  .  .  .  }
-       //     46  .  .  .  .  Rbrace: 5:1
-       //     47  .  .  .  }
-       //     48  .  .  }
-       //     49  .  }
-       //     50  .  FileStart: 1:1
-       //     51  .  FileEnd: 5:3
-       //     52  .  Scope: *ast.Scope {
-       //     53  .  .  Objects: map[string]*ast.Object (len = 1) {
-       //     54  .  .  .  "main": *(obj @ 11)
-       //     55  .  .  }
-       //     56  .  }
-       //     57  .  Unresolved: []*ast.Ident (len = 1) {
-       //     58  .  .  0: *(obj @ 29)
-       //     59  .  }
-       //     60  .  GoVersion: ""
-       //     61  }
+       //     37  .  .  .  .  .  .  .  .  .  ValueEnd: 4:25
+       //     38  .  .  .  .  .  .  .  .  .  Kind: STRING
+       //     39  .  .  .  .  .  .  .  .  .  Value: "\"Hello, World!\""
+       //     40  .  .  .  .  .  .  .  .  }
+       //     41  .  .  .  .  .  .  .  }
+       //     42  .  .  .  .  .  .  .  Ellipsis: -
+       //     43  .  .  .  .  .  .  .  Rparen: 4:25
+       //     44  .  .  .  .  .  .  }
+       //     45  .  .  .  .  .  }
+       //     46  .  .  .  .  }
+       //     47  .  .  .  .  Rbrace: 5:1
+       //     48  .  .  .  }
+       //     49  .  .  }
+       //     50  .  }
+       //     51  .  FileStart: 1:1
+       //     52  .  FileEnd: 5:3
+       //     53  .  Scope: *ast.Scope {
+       //     54  .  .  Objects: map[string]*ast.Object (len = 1) {
+       //     55  .  .  .  "main": *(obj @ 11)
+       //     56  .  .  }
+       //     57  .  }
+       //     58  .  Unresolved: []*ast.Ident (len = 1) {
+       //     59  .  .  0: *(obj @ 29)
+       //     60  .  }
+       //     61  .  GoVersion: ""
+       //     62  }
  }
  
  func ExamplePreorder() {
diff --git a/src/go/build/deps_test.go b/src/go/build/deps_test.go

index 868be194c39f3906522b83b0c3e66fb47e180e4c..5f95535ed949d36ba7dd2fb6681351412f0408cc 100644 (file)
--- a/src/go/build/deps_test.go
+++ b/src/go/build/deps_test.go
@@ -335,6 +335,7 @@ var depsRules = `
         < internal/gover
         < go/version
         < go/token
+       < go/internal/scannerhooks
         < go/scanner
         < go/ast
         < go/internal/typeparams;
diff --git a/src/go/internal/scannerhooks/hooks.go b/src/go/internal/scannerhooks/hooks.go

new file mode 100644 (file)

index 0000000..057261d
--- /dev/null
+++ b/src/go/internal/scannerhooks/hooks.go
@@ -0,0 +1,11 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package scannerhooks defines nonexported channels between parser and scanner.
+// Ideally this package could be eliminated by adding API to scanner.
+package scannerhooks
+
+import "go/token"
+
+var StringEnd func(scanner any) token.Pos
diff --git a/src/go/parser/parser.go b/src/go/parser/parser.go

index e725371e76825e2f525a77bffb832a0892bb79dd..e01a221968f1dbb79057fe738b9991c82665e356 100644 (file)
--- a/src/go/parser/parser.go
+++ b/src/go/parser/parser.go
@@ -28,6 +28,7 @@ import (
         "fmt"
         "go/ast"
         "go/build/constraint"
+       "go/internal/scannerhooks"
         "go/scanner"
         "go/token"
         "strings"
@@ -52,9 +53,10 @@ type parser struct {
         goVersion   string            // minimum Go version found in //go:build comment
  
         // Next token
-       pos token.Pos   // token position
-       tok token.Token // one token look-ahead
-       lit string      // token literal
+       pos       token.Pos   // token position
+       tok       token.Token // one token look-ahead
+       lit       string      // token literal
+       stringEnd token.Pos   // position immediately after token; STRING only
  
         // Error recovery
         // (used to limit the number of calls to parser.advance
@@ -163,6 +165,10 @@ func (p *parser) next0() {
                                 continue
                         }
                 } else {
+                       if p.tok == token.STRING {
+                               p.stringEnd = scannerhooks.StringEnd(&p.scanner)
+                       }
+
                         // Found a non-comment; top of file is over.
                         p.top = false
                 }
@@ -720,7 +726,7 @@ func (p *parser) parseFieldDecl() *ast.Field {
  
         var tag *ast.BasicLit
         if p.tok == token.STRING {
-               tag = &ast.BasicLit{ValuePos: p.pos, Kind: p.tok, Value: p.lit}
+               tag = &ast.BasicLit{ValuePos: p.pos, ValueEnd: p.stringEnd, Kind: p.tok, Value: p.lit}
                 p.next()
         }
  
@@ -1474,7 +1480,11 @@ func (p *parser) parseOperand() ast.Expr {
                 return x
  
         case token.INT, token.FLOAT, token.IMAG, token.CHAR, token.STRING:
-               x := &ast.BasicLit{ValuePos: p.pos, Kind: p.tok, Value: p.lit}
+               end := p.pos + token.Pos(len(p.lit))
+               if p.tok == token.STRING {
+                       end = p.stringEnd
+               }
+               x := &ast.BasicLit{ValuePos: p.pos, ValueEnd: end, Kind: p.tok, Value: p.lit}
                 p.next()
                 return x
  
@@ -2511,9 +2521,11 @@ func (p *parser) parseImportSpec(doc *ast.CommentGroup, _ token.Token, _ int) as
         }
  
         pos := p.pos
+       end := p.pos
         var path string
         if p.tok == token.STRING {
                 path = p.lit
+               end = p.stringEnd
                 p.next()
         } else if p.tok.IsLiteral() {
                 p.error(pos, "import path must be a string")
@@ -2528,7 +2540,7 @@ func (p *parser) parseImportSpec(doc *ast.CommentGroup, _ token.Token, _ int) as
         spec := &ast.ImportSpec{
                 Doc:     doc,
                 Name:    ident,
-               Path:    &ast.BasicLit{ValuePos: pos, Kind: token.STRING, Value: path},
+               Path:    &ast.BasicLit{ValuePos: pos, ValueEnd: end, Kind: token.STRING, Value: path},
                 Comment: comment,
         }
         p.imports = append(p.imports, spec)
diff --git a/src/go/parser/parser_test.go b/src/go/parser/parser_test.go

index 87b7d7bbab7b4fc02f70b160331c56db61441e07..811818923098398ba5b51309e7a51ee2108a0f58 100644 (file)
--- a/src/go/parser/parser_test.go
+++ b/src/go/parser/parser_test.go
@@ -946,3 +946,53 @@ func _() {}
                 t.Errorf("unexpected doc comment %v", docComment2)
         }
  }
+
+// Tests of BasicLit.End() method, which in go1.26 started precisely
+// recording the Value token's end position instead of heuristically
+// computing it, which is inaccurate for strings containing "\r".
+func TestBasicLit_End(t *testing.T) {
+       // lit is a raw string literal containing [a b c \r \n],
+       // denoting "abc\n", because the scanner normalizes \r\n to \n.
+       const stringlit = "`abc\r\n`"
+
+       // The semicolons exercise the case in which the next token
+       // (a SEMICOLON implied by a \n) isn't immediate but follows
+       // some horizontal space.
+       const src = `package p
+
+import ` + stringlit + ` ;
+
+type _ struct{ x int ` + stringlit + ` }
+
+const _ = ` + stringlit + ` ;
+`
+
+       fset := token.NewFileSet()
+       f, _ := ParseFile(fset, "", src, ParseComments|SkipObjectResolution)
+       tokFile := fset.File(f.Pos())
+
+       count := 0
+       ast.Inspect(f, func(n ast.Node) bool {
+               if lit, ok := n.(*ast.BasicLit); ok {
+                       count++
+                       var (
+                               start = tokFile.Offset(lit.Pos())
+                               end   = tokFile.Offset(lit.End())
+                       )
+
+                       // Check BasicLit.Value.
+                       if want := "`abc\n`"; lit.Value != want {
+                               t.Errorf("%s: BasicLit.Value = %q, want %q", fset.Position(lit.Pos()), lit.Value, want)
+                       }
+
+                       // Check source extent.
+                       if got := src[start:end]; got != stringlit {
+                               t.Errorf("%s: src[BasicLit.Pos:End] = %q, want %q", fset.Position(lit.Pos()), got, stringlit)
+                       }
+               }
+               return true
+       })
+       if count != 3 {
+               t.Errorf("found %d BasicLit, want 3", count)
+       }
+}
diff --git a/src/go/scanner/scanner.go b/src/go/scanner/scanner.go

index cdbeb6323c6e80d63034b15819a6868c96f96ab6..07d987c88f9bab3bf9b3514d829345aff39402bf 100644 (file)
--- a/src/go/scanner/scanner.go
+++ b/src/go/scanner/scanner.go
@@ -10,6 +10,7 @@ package scanner
  import (
         "bytes"
         "fmt"
+       "go/internal/scannerhooks"
         "go/token"
         "path/filepath"
         "strconv"
@@ -41,11 +42,19 @@ type Scanner struct {
         lineOffset int       // current line offset
         insertSemi bool      // insert a semicolon before next newline
         nlPos      token.Pos // position of newline in preceding comment
+       stringEnd  token.Pos // end position; defined only for STRING tokens
  
         // public state - ok to modify
         ErrorCount int // number of errors encountered
  }
  
+// Provide go/parser with backdoor access to the StringEnd information.
+func init() {
+       scannerhooks.StringEnd = func(scanner any) token.Pos {
+               return scanner.(*Scanner).stringEnd
+       }
+}
+
  const (
         bom = 0xFEFF // byte order mark, only permitted as very first character
         eof = -1     // end of file
@@ -691,7 +700,7 @@ func stripCR(b []byte, comment bool) []byte {
         return c[:i]
  }
  
-func (s *Scanner) scanRawString() string {
+func (s *Scanner) scanRawString() (string, int) {
         // '`' opening already consumed
         offs := s.offset - 1
  
@@ -712,11 +721,12 @@ func (s *Scanner) scanRawString() string {
         }
  
         lit := s.src[offs:s.offset]
+       rawLen := len(lit)
         if hasCR {
                 lit = stripCR(lit, false)
         }
  
-       return string(lit)
+       return string(lit), rawLen
  }
  
  func (s *Scanner) skipWhitespace() {
@@ -850,6 +860,7 @@ scanAgain:
                         insertSemi = true
                         tok = token.STRING
                         lit = s.scanString()
+                       s.stringEnd = pos + token.Pos(len(lit))
                 case '\'':
                         insertSemi = true
                         tok = token.CHAR
@@ -857,7 +868,9 @@ scanAgain:
                 case '`':
                         insertSemi = true
                         tok = token.STRING
-                       lit = s.scanRawString()
+                       var rawLen int
+                       lit, rawLen = s.scanRawString()
+                       s.stringEnd = pos + token.Pos(rawLen)
                 case ':':
                         tok = s.switch2(token.COLON, token.DEFINE)
                 case '.':
author	Alan Donovan <adonovan@google.com>
	Wed, 12 Nov 2025 22:13:40 +0000 (17:13 -0500)
committer	Alan Donovan <adonovan@google.com>
	Mon, 17 Nov 2025 18:52:48 +0000 (10:52 -0800)
api/next/76031.txt	[new file with mode: 0644]	patch \| blob
doc/next/6-stdlib/99-minor/go/ast/76031.md	[new file with mode: 0644]	patch \| blob
src/go/ast/ast.go		patch \| blob \| history
src/go/ast/commentmap_test.go		patch \| blob \| history
src/go/ast/example_test.go		patch \| blob \| history
src/go/build/deps_test.go		patch \| blob \| history
src/go/internal/scannerhooks/hooks.go	[new file with mode: 0644]	patch \| blob
src/go/parser/parser.go		patch \| blob \| history
src/go/parser/parser_test.go		patch \| blob \| history
src/go/scanner/scanner.go		patch \| blob \| history