From: Mauri de Souza Meneguzzo Date: Sat, 29 Jul 2023 20:26:00 +0000 (+0000) Subject: regexp/syntax: accept (?...) syntax as valid capture X-Git-Tag: go1.22rc1~1467 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=ee61186b3301bb1c8610c0925fffd89e061909bb;p=gostls13.git regexp/syntax: accept (?...) syntax as valid capture Currently the only named capture supported by regexp is (?Pa). The syntax (?a) is also widely used and there is currently an effort from the Rust regex and RE2 teams to also accept this syntax. Fixes #58458 Change-Id: If22d44d3a5c4e8133ec68238ab130c151ca7c5c5 GitHub-Last-Rev: 31b50e6ab40cfb0f36df6f570525657d4680017f GitHub-Pull-Request: golang/go#61624 Reviewed-on: https://go-review.googlesource.com/c/go/+/513838 Auto-Submit: Ian Lance Taylor TryBot-Result: Gopher Robot Reviewed-by: David Chase Run-TryBot: Ian Lance Taylor Reviewed-by: Ian Lance Taylor --- diff --git a/src/regexp/syntax/doc.go b/src/regexp/syntax/doc.go index f6a4b43f7a..eb8a971c73 100644 --- a/src/regexp/syntax/doc.go +++ b/src/regexp/syntax/doc.go @@ -56,6 +56,7 @@ Grouping: (re) numbered capturing group (submatch) (?Pre) named & numbered capturing group (submatch) + (?re) named & numbered capturing group (submatch) (?:re) non-capturing group (?flags) set flags within current group; non-capturing (?flags:re) set flags during re; non-capturing diff --git a/src/regexp/syntax/parse.go b/src/regexp/syntax/parse.go index accee9ab08..a4ccfe3bdb 100644 --- a/src/regexp/syntax/parse.go +++ b/src/regexp/syntax/parse.go @@ -1159,9 +1159,18 @@ func (p *parser) parsePerlFlags(s string) (rest string, err error) { // support all three as well. EcmaScript 4 uses only the Python form. // // In both the open source world (via Code Search) and the - // Google source tree, (?Pname) is the dominant form, - // so that's the one we implement. One is enough. - if len(t) > 4 && t[2] == 'P' && t[3] == '<' { + // Google source tree, (?Pname) and (?name) are the + // dominant forms of named captures and both are supported. + startsWithP := len(t) > 4 && t[2] == 'P' && t[3] == '<' + startsWithName := len(t) > 3 && t[2] == '<' + + if startsWithP || startsWithName { + // position of expr start + exprStartPos := 4 + if startsWithName { + exprStartPos = 3 + } + // Pull out name. end := strings.IndexRune(t, '>') if end < 0 { @@ -1171,8 +1180,8 @@ func (p *parser) parsePerlFlags(s string) (rest string, err error) { return "", &Error{ErrInvalidNamedCapture, s} } - capture := t[:end+1] // "(?P" - name := t[4:end] // "name" + capture := t[:end+1] // "(?P" or "(?" + name := t[exprStartPos:end] // "name" if err = checkUTF8(name); err != nil { return "", err } diff --git a/src/regexp/syntax/parse_test.go b/src/regexp/syntax/parse_test.go index 67e3c5622a..d7999046e0 100644 --- a/src/regexp/syntax/parse_test.go +++ b/src/regexp/syntax/parse_test.go @@ -160,6 +160,7 @@ var parseTests = []parseTest{ // Test named captures {`(?Pa)`, `cap{name:lit{a}}`}, + {`(?a)`, `cap{name:lit{a}}`}, // Case-folded literals {`[Aa]`, `litfold{A}`}, @@ -482,6 +483,11 @@ var invalidRegexps = []string{ `(?Pa)`, `(?P<>a)`, + `(?a`, + `(?`, + `(?a)`, + `(?<>a)`, `[a-Z]`, `(?i)[a-Z]`, `\Q\E*`,