From faf564644d9948e001db7c4f61a0ac9a61283003 Mon Sep 17 00:00:00 2001 From: Alan Donovan Date: Tue, 25 Jul 2023 12:47:05 +0100 Subject: [PATCH] =?utf8?q?go/scanner:=20give=20specific=20error=20for=20cu?= =?utf8?q?rvy=20=E2=80=9Cabc=E2=80=9D=20quotes?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Code examples sometimes mistakenly use curvy quotes, leading to hard-to-spot invalid token errors. This change makes the error message explicit. (An alternative change would be to accept them in place of "abc" and emit an error, but the extra check would likely add an unacceptable dynamic cost to string scanning.) Fixes #61450 Change-Id: Ie2b18c958c6f8f71a56ac193a94a8d16eea839db Reviewed-on: https://go-review.googlesource.com/c/go/+/512855 Reviewed-by: Robert Griesemer Run-TryBot: Alan Donovan TryBot-Result: Gopher Robot --- src/go/scanner/scanner.go | 8 +++++++- src/go/scanner/scanner_test.go | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/go/scanner/scanner.go b/src/go/scanner/scanner.go index 75f835d310..8742c29906 100644 --- a/src/go/scanner/scanner.go +++ b/src/go/scanner/scanner.go @@ -943,7 +943,13 @@ scanAgain: default: // next reports unexpected BOMs - don't repeat if ch != bom { - s.errorf(s.file.Offset(pos), "illegal character %#U", ch) + // Report an informative error for U+201[CD] quotation + // marks, which are easily introduced via copy and paste. + if ch == '“' || ch == '”' { + s.errorf(s.file.Offset(pos), "curly quotation mark %q (use neutral %q)", ch, '"') + } else { + s.errorf(s.file.Offset(pos), "illegal character %#U", ch) + } } insertSemi = s.insertSemi // preserve insertSemi info tok = token.ILLEGAL diff --git a/src/go/scanner/scanner_test.go b/src/go/scanner/scanner_test.go index 9046148ac2..916a40a874 100644 --- a/src/go/scanner/scanner_test.go +++ b/src/go/scanner/scanner_test.go @@ -813,6 +813,7 @@ var errors = []struct { {`"` + "abc\ufeffdef" + `"`, token.STRING, 4, `"` + "abc\ufeffdef" + `"`, "illegal byte order mark"}, // only first BOM is ignored {"abc\x00def", token.IDENT, 3, "abc", "illegal character NUL"}, {"abc\x00", token.IDENT, 3, "abc", "illegal character NUL"}, + {"“abc”", token.ILLEGAL, 0, "abc", `curly quotation mark '“' (use neutral '"')`}, } func TestScanErrors(t *testing.T) { -- 2.48.1