From 398bf9d5a09fdcc14088dc42f953d6e3142fd343 Mon Sep 17 00:00:00 2001 From: Matthew Dempsky Date: Mon, 9 Mar 2015 17:40:23 -0700 Subject: [PATCH] cmd/internal/gc, cmd/yacc: restore custom syntax error messages This restores go.errors from before 3af0d79 along with a fixed up version of the bisonerrors AWK script, translated to Go. However, this means Yyerror needs access to the yacc parser's state, which is currently private. To workaround that, add a "state" accessor method like the Lookahead method added in c7fa3c6. Update issue #9968. Change-Id: Ib868789e92fdb7d135442120a392457923e50121 Reviewed-on: https://go-review.googlesource.com/7270 Reviewed-by: Russ Cox Run-TryBot: Matthew Dempsky TryBot-Result: Gobot Gobot --- src/cmd/internal/gc/go.errors | 81 +++++++++++++ src/cmd/internal/gc/lex.go | 6 +- src/cmd/internal/gc/subr.go | 13 +- src/cmd/internal/gc/y.go | 4 + src/cmd/internal/gc/yaccerrors.go | 194 ++++++++++++++++++++++++++++++ src/cmd/internal/gc/yymsg.go | 83 +++++++++++++ src/cmd/yacc/yacc.go | 4 + test/fixedbugs/bug121.go | 4 - test/fixedbugs/issue4468.go | 3 - 9 files changed, 373 insertions(+), 19 deletions(-) create mode 100644 src/cmd/internal/gc/go.errors create mode 100644 src/cmd/internal/gc/yaccerrors.go create mode 100644 src/cmd/internal/gc/yymsg.go diff --git a/src/cmd/internal/gc/go.errors b/src/cmd/internal/gc/go.errors new file mode 100644 index 0000000000..8370a2007d --- /dev/null +++ b/src/cmd/internal/gc/go.errors @@ -0,0 +1,81 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Example-based syntax error messages. +// See yaccerrors.go. + +package gc + +var yymsg = []struct { + yystate int + yychar int + msg string +}{ + // Each line of the form % token list + // is converted by yaccerrors.go into the yystate and yychar caused + // by that token list. + + % loadsys package LIMPORT '(' LLITERAL import_package import_there ',' + "unexpected comma during import block"}, + + % loadsys package LIMPORT LNAME ';' + "missing import path; require quoted string"}, + + % loadsys package imports LFUNC LNAME '(' ')' '{' LIF if_header ';' + "missing { after if clause"}, + + % loadsys package imports LFUNC LNAME '(' ')' '{' LSWITCH if_header ';' + "missing { after switch clause"}, + + % loadsys package imports LFUNC LNAME '(' ')' '{' LFOR for_header ';' + "missing { after for clause"}, + + % loadsys package imports LFUNC LNAME '(' ')' '{' LFOR ';' LBODY + "missing { after for clause"}, + + % loadsys package imports LFUNC LNAME '(' ')' ';' '{' + "unexpected semicolon or newline before {"}, + + % loadsys package imports LTYPE LNAME ';' + "unexpected semicolon or newline in type declaration"}, + + % loadsys package imports LCHAN '}' + "unexpected } in channel type"}, + + % loadsys package imports LCHAN ')' + "unexpected ) in channel type"}, + + % loadsys package imports LCHAN ',' + "unexpected comma in channel type"}, + + % loadsys package imports LFUNC LNAME '(' ')' '{' if_stmt ';' LELSE + "unexpected semicolon or newline before else"}, + + % loadsys package imports LTYPE LNAME LINTERFACE '{' LNAME ',' LNAME + "name list not allowed in interface type"}, + + % loadsys package imports LFUNC LNAME '(' ')' '{' LFOR LVAR LNAME '=' LNAME + "var declaration not allowed in for initializer"}, + + % loadsys package imports LVAR LNAME '[' ']' LNAME '{' + "unexpected { at end of statement"}, + + % loadsys package imports LFUNC LNAME '(' ')' '{' LVAR LNAME '[' ']' LNAME '{' + "unexpected { at end of statement"}, + + % loadsys package imports LFUNC LNAME '(' ')' '{' LDEFER LNAME ';' + "argument to go/defer must be function call"}, + + % loadsys package imports LVAR LNAME '=' LNAME '{' LNAME ';' + "need trailing comma before newline in composite literal"}, + + % loadsys package imports LVAR LNAME '=' comptype '{' LNAME ';' + "need trailing comma before newline in composite literal"}, + + % loadsys package imports LFUNC LNAME '(' ')' '{' LFUNC LNAME + "nested func not allowed"}, + + % loadsys package imports LFUNC LNAME '(' ')' '{' LIF if_header loop_body LELSE ';' + "else must be followed by if or statement block"}, +} diff --git a/src/cmd/internal/gc/lex.go b/src/cmd/internal/gc/lex.go index 323b5c5d47..bee288ff2b 100644 --- a/src/cmd/internal/gc/lex.go +++ b/src/cmd/internal/gc/lex.go @@ -3,6 +3,7 @@ // license that can be found in the LICENSE file. //go:generate go tool yacc go.y +//go:generate go run yaccerrors.go //go:generate go run mkbuiltin.go runtime unsafe package gc @@ -1782,11 +1783,6 @@ func pragcgo(text string) { type yy struct{} -var yymsg []struct { - yystate, yychar int - msg string -} - func (yy) Lex(v *yySymType) int { return int(yylex(v)) } diff --git a/src/cmd/internal/gc/subr.go b/src/cmd/internal/gc/subr.go index 5508e07c9e..52bb201a82 100644 --- a/src/cmd/internal/gc/subr.go +++ b/src/cmd/internal/gc/subr.go @@ -122,10 +122,6 @@ func yyerrorl(line int, fmt_ string, args ...interface{}) { } } -var yystate int - -var yychar_subr int - var yyerror_lastsyntax int func Yyerror(fmt_ string, args ...interface{}) { @@ -139,8 +135,11 @@ func Yyerror(fmt_ string, args ...interface{}) { if strings.HasPrefix(fmt_, "syntax error") { nsyntaxerrors++ + yystate := theparser.(*yyParserImpl).state() + yychar := theparser.Lookahead() + if Debug['x'] != 0 { - fmt.Printf("yyerror: yystate=%d yychar=%d\n", yystate, yychar_subr) + fmt.Printf("yyerror: yystate=%d yychar=%d\n", yystate, yychar) } // An unexpected EOF caused a syntax error. Use the previous @@ -166,8 +165,8 @@ func Yyerror(fmt_ string, args ...interface{}) { } // look for parse state-specific errors in list (see go.errors). - for i := 0; i < len(yymsg); i++ { - if yymsg[i].yystate == yystate && yymsg[i].yychar == yychar_subr { + for i := range yymsg { + if yymsg[i].yystate == yystate && yymsg[i].yychar == yychar { yyerrorl(int(lexlineno), "syntax error: %s", yymsg[i].msg) return } diff --git a/src/cmd/internal/gc/y.go b/src/cmd/internal/gc/y.go index f63bc01bc0..c991829864 100644 --- a/src/cmd/internal/gc/y.go +++ b/src/cmd/internal/gc/y.go @@ -857,6 +857,7 @@ type yyParser interface { type yyParserImpl struct { lookahead func() int + state func() int } func (p *yyParserImpl) Lookahead() int { @@ -866,6 +867,7 @@ func (p *yyParserImpl) Lookahead() int { func yyNewParser() yyParser { p := &yyParserImpl{ lookahead: func() int { return -1 }, + state: func() int { return -1 }, } return p } @@ -942,9 +944,11 @@ func (yyrcvr *yyParserImpl) Parse(yylex yyLexer) int { yystate := 0 yychar := -1 yytoken := -1 // yychar translated into internal numbering + yyrcvr.state = func() int { return yystate } yyrcvr.lookahead = func() int { return yychar } defer func() { // Make sure we report no lookahead when not parsing. + yystate = -1 yychar = -1 yytoken = -1 }() diff --git a/src/cmd/internal/gc/yaccerrors.go b/src/cmd/internal/gc/yaccerrors.go new file mode 100644 index 0000000000..9dc54d9c8c --- /dev/null +++ b/src/cmd/internal/gc/yaccerrors.go @@ -0,0 +1,194 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +// This program implements the core idea from +// +// Clinton L. Jeffery, Generating LR syntax error messages from examples, +// ACM TOPLAS 25(5) (September 2003). http://doi.acm.org/10.1145/937563.937566 +// +// It reads Bison's summary of a grammar followed by a file +// like go.errors, replacing lines beginning with % by the +// yystate and yychar that will be active when an error happens +// while parsing that line. +// +// Unlike the system described in the paper, the lines in go.errors +// give grammar symbol name lists, not actual program fragments. +// This is a little less programmer-friendly but doesn't require being +// able to run the text through lex.c. + +package main + +import ( + "bufio" + "fmt" + "io" + "log" + "os" + "strconv" + "strings" +) + +func xatoi(s string) int { + n, err := strconv.Atoi(s) + if err != nil { + log.Fatal(err) + } + return n +} + +func trimParen(s string) string { + s = strings.TrimPrefix(s, "(") + s = strings.TrimSuffix(s, ")") + return s +} + +type action struct { + token string + n int +} + +var shift = map[int][]action{} +var reduce = map[int][]action{} + +type rule struct { + lhs string + size int +} + +var rules = map[int]rule{} + +func readYaccOutput() { + r, err := os.Open("y.output") + if err != nil { + log.Fatal(err) + } + defer r.Close() + + var state int + + scanner := bufio.NewScanner(r) + for scanner.Scan() { + f := strings.Fields(scanner.Text()) + nf := len(f) + + if nf >= 4 && f[1] == "terminals," && f[3] == "nonterminals" { + // We're done. + break + } + + if nf >= 2 && f[0] == "state" { + state = xatoi(f[1]) + continue + } + if nf >= 3 && (f[1] == "shift" || f[1] == "goto") { + shift[state] = append(shift[state], action{f[0], xatoi(f[2])}) + continue + } + if nf >= 3 && f[1] == "reduce" { + reduce[state] = append(reduce[state], action{f[0], xatoi(f[2])}) + continue + } + if nf >= 3 && strings.HasSuffix(f[0], ":") && strings.HasPrefix(f[nf-1], "(") && strings.HasSuffix(f[nf-1], ")") { + n := xatoi(trimParen(f[nf-1])) + + size := nf - 2 + if size == 1 && f[1] == "." { + size = 0 + } + + rules[n] = rule{strings.TrimSuffix(f[0], ":"), size} + continue + } + } +} + +func runMachine(w io.Writer, s string) { + f := strings.Fields(s) + + // Run it through the LR machine and print the induced "yystate, yychar," + // at the point where the error happens. + + var stack []int + state := 0 + i := 1 + tok := "" + +Loop: + if tok == "" && i < len(f) { + tok = f[i] + i++ + } + + for _, a := range shift[state] { + if a.token == tok { + if false { + fmt.Println("SHIFT ", tok, " ", state, " -> ", a) + } + stack = append(stack, state) + state = a.n + tok = "" + goto Loop + } + } + + for _, a := range reduce[state] { + if a.token == tok || a.token == "." { + stack = append(stack, state) + rule, ok := rules[a.n] + if !ok { + log.Fatal("missing rule") + } + stack = stack[:len(stack)-rule.size] + state = stack[len(stack)-1] + stack = stack[:len(stack)-1] + if tok != "" { + i-- + } + tok = rule.lhs + if false { + fmt.Println("REDUCE ", stack, " ", state, " ", tok, " rule ", rule) + } + goto Loop + } + } + + // No shift or reduce applied - found the error. + fmt.Fprintf(w, "\t{%d, %s,\n", state, tok) +} + +func processGoErrors() { + r, err := os.Open("go.errors") + if err != nil { + log.Fatal(err) + } + defer r.Close() + + w, err := os.Create("yymsg.go") + if err != nil { + log.Fatal(err) + } + defer w.Close() + + fmt.Fprintf(w, "// DO NOT EDIT - generated with go generate\n\n") + + scanner := bufio.NewScanner(r) + for scanner.Scan() { + s := scanner.Text() + + // Treat % as first field on line as introducing a pattern (token sequence). + if strings.HasPrefix(strings.TrimSpace(s), "%") { + runMachine(w, s) + continue + } + + fmt.Fprintln(w, s) + } +} + +func main() { + readYaccOutput() + processGoErrors() +} diff --git a/src/cmd/internal/gc/yymsg.go b/src/cmd/internal/gc/yymsg.go new file mode 100644 index 0000000000..b1309595f7 --- /dev/null +++ b/src/cmd/internal/gc/yymsg.go @@ -0,0 +1,83 @@ +// DO NOT EDIT - generated with go generate + +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Example-based syntax error messages. +// See yaccerrors.go. + +package gc + +var yymsg = []struct { + yystate int + yychar int + msg string +}{ + // Each line of the form % token list + // is converted by bisonerrors into the yystate and yychar caused + // by that token list. + + {332, ',', + "unexpected comma during import block"}, + + {89, ';', + "missing import path; require quoted string"}, + + {390, ';', + "missing { after if clause"}, + + {387, ';', + "missing { after switch clause"}, + + {279, ';', + "missing { after for clause"}, + + {498, LBODY, + "missing { after for clause"}, + + {17, '{', + "unexpected semicolon or newline before {"}, + + {111, ';', + "unexpected semicolon or newline in type declaration"}, + + {78, '}', + "unexpected } in channel type"}, + + {78, ')', + "unexpected ) in channel type"}, + + {78, ',', + "unexpected comma in channel type"}, + + {416, LELSE, + "unexpected semicolon or newline before else"}, + + {329, ',', + "name list not allowed in interface type"}, + + {279, LVAR, + "var declaration not allowed in for initializer"}, + + {25, '{', + "unexpected { at end of statement"}, + + {371, '{', + "unexpected { at end of statement"}, + + {122, ';', + "argument to go/defer must be function call"}, + + {398, ';', + "need trailing comma before newline in composite literal"}, + + {414, ';', + "need trailing comma before newline in composite literal"}, + + {124, LNAME, + "nested func not allowed"}, + + {650, ';', + "else must be followed by if or statement block"}, +} diff --git a/src/cmd/yacc/yacc.go b/src/cmd/yacc/yacc.go index c6d79e5143..bb13b6d719 100644 --- a/src/cmd/yacc/yacc.go +++ b/src/cmd/yacc/yacc.go @@ -3212,6 +3212,7 @@ type $$Parser interface { type $$ParserImpl struct { lookahead func() int + state func() int } func (p *$$ParserImpl) Lookahead() int { @@ -3221,6 +3222,7 @@ func (p *$$ParserImpl) Lookahead() int { func $$NewParser() $$Parser { p := &$$ParserImpl{ lookahead: func() int { return -1 }, + state: func() int { return -1 }, } return p } @@ -3297,9 +3299,11 @@ func ($$rcvr *$$ParserImpl) Parse($$lex $$Lexer) int { $$state := 0 $$char := -1 $$token := -1 // $$char translated into internal numbering + $$rcvr.state = func() int { return $$state } $$rcvr.lookahead = func() int { return $$char } defer func() { // Make sure we report no lookahead when not parsing. + $$state = -1 $$char = -1 $$token = -1 }() diff --git a/test/fixedbugs/bug121.go b/test/fixedbugs/bug121.go index 34924019b8..22c7181752 100644 --- a/test/fixedbugs/bug121.go +++ b/test/fixedbugs/bug121.go @@ -1,6 +1,3 @@ -// skip -// TODO(rsc): Reenable. See issue 9968. - // errorcheck // Copyright 2009 The Go Authors. All rights reserved. @@ -18,4 +15,3 @@ type I interface { type J interface { h T; // ERROR "syntax|signature" } - diff --git a/test/fixedbugs/issue4468.go b/test/fixedbugs/issue4468.go index 67b0a5d89b..ef0b46bcf6 100644 --- a/test/fixedbugs/issue4468.go +++ b/test/fixedbugs/issue4468.go @@ -1,6 +1,3 @@ -// skip -// TODO(rsc): Reenable. See issue 9968. - // errorcheck // Copyright 2012 The Go Authors. All rights reserved. -- 2.48.1