upperhex = "0123456789ABCDEF"
)
+// contains reports whether the string contains the byte c.
+func contains(s string, c byte) bool {
+ return index(s, c) != -1
+}
+
func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string {
return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly))
}
return
}
+// QuotedPrefix returns the quoted string (as understood by Unquote) at the prefix of s.
+// If s does not start with a valid quoted string, QuotedPrefix returns an error.
+func QuotedPrefix(s string) (string, error) {
+ out, _, err := unquote(s, false)
+ return out, err
+}
+
// Unquote interprets s as a single-quoted, double-quoted,
// or backquoted Go string literal, returning the string value
// that s quotes. (If s is single-quoted, it would be a Go
// character literal; Unquote returns the corresponding
// one-character string.)
func Unquote(s string) (string, error) {
- n := len(s)
- if n < 2 {
+ out, rem, err := unquote(s, true)
+ if len(rem) > 0 {
return "", ErrSyntax
}
- quote := s[0]
- if quote != s[n-1] {
- return "", ErrSyntax
+ return out, err
+}
+
+// unquote parses a quoted string at the start of the input,
+// returning the parsed prefix, the remaining suffix, and any parse errors.
+// If unescape is true, the parsed prefix is unescaped,
+// otherwise the input prefix is provided verbatim.
+func unquote(in string, unescape bool) (out, rem string, err error) {
+ // Determine the quote form and optimistically find the terminating quote.
+ if len(in) < 2 {
+ return "", in, ErrSyntax
+ }
+ quote := in[0]
+ end := index(in[1:], quote)
+ if end < 0 {
+ return "", in, ErrSyntax
}
- s = s[1 : n-1]
+ end += 2 // position after terminating quote; may be wrong if escape sequences are present
- if quote == '`' {
- if contains(s, '`') {
- return "", ErrSyntax
+ switch quote {
+ case '`':
+ switch {
+ case !unescape:
+ out = in[:end] // include quotes
+ case !contains(in[:end], '\r'):
+ out = in[len("`") : end-len("`")] // exclude quotes
+ default:
+ // Carriage return characters ('\r') inside raw string literals
+ // are discarded from the raw string value.
+ buf := make([]byte, 0, end-len("`")-len("\r")-len("`"))
+ for i := len("`"); i < end-len("`"); i++ {
+ if in[i] != '\r' {
+ buf = append(buf, in[i])
+ }
+ }
+ out = string(buf)
}
- if contains(s, '\r') {
- // -1 because we know there is at least one \r to remove.
- buf := make([]byte, 0, len(s)-1)
- for i := 0; i < len(s); i++ {
- if s[i] != '\r' {
- buf = append(buf, s[i])
+ // NOTE: Prior implementations did not verify that raw strings consist
+ // of valid UTF-8 characters and we continue to not verify it as such.
+ // The Go specification does not explicitly require valid UTF-8,
+ // but only mention that it is implicitly valid for Go source code
+ // (which must be valid UTF-8).
+ return out, in[end:], nil
+ case '"', '\'':
+ // Handle quoted strings without any escape sequences.
+ if !contains(in[:end], '\\') && !contains(in[:end], '\n') {
+ var valid bool
+ switch quote {
+ case '"':
+ valid = utf8.ValidString(in[len(`"`) : end-len(`"`)])
+ case '\'':
+ r, n := utf8.DecodeRuneInString(in[len("'") : end-len("'")])
+ valid = len("'")+n+len("'") == end && (r != utf8.RuneError || n != 1)
+ }
+ if valid {
+ out = in[:end]
+ if unescape {
+ out = out[1 : end-1] // exclude quotes
}
+ return out, in[end:], nil
}
- return string(buf), nil
}
- return s, nil
- }
- if quote != '"' && quote != '\'' {
- return "", ErrSyntax
- }
- if contains(s, '\n') {
- return "", ErrSyntax
- }
- // Is it trivial? Avoid allocation.
- if !contains(s, '\\') && !contains(s, quote) {
- switch quote {
- case '"':
- if utf8.ValidString(s) {
- return s, nil
+ // Handle quoted strings with escape sequences.
+ var buf []byte
+ in0 := in
+ in = in[1:] // skip starting quote
+ if unescape {
+ buf = make([]byte, 0, 3*end/2) // try to avoid more allocations
+ }
+ for len(in) > 0 && in[0] != quote {
+ // Process the next character,
+ // rejecting any unescaped newline characters which are invalid.
+ r, multibyte, rem, err := UnquoteChar(in, quote)
+ if in[0] == '\n' || err != nil {
+ return "", in0, ErrSyntax
}
- case '\'':
- r, size := utf8.DecodeRuneInString(s)
- if size == len(s) && (r != utf8.RuneError || size != 1) {
- return s, nil
+ in = rem
+
+ // Append the character if unescaping the input.
+ if unescape {
+ if r < utf8.RuneSelf || !multibyte {
+ buf = append(buf, byte(r))
+ } else {
+ var arr [utf8.UTFMax]byte
+ n := utf8.EncodeRune(arr[:], r)
+ buf = append(buf, arr[:n]...)
+ }
}
- }
- }
- var runeTmp [utf8.UTFMax]byte
- buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
- for len(s) > 0 {
- c, multibyte, ss, err := UnquoteChar(s, quote)
- if err != nil {
- return "", err
+ // Single quoted strings must be a single character.
+ if quote == '\'' {
+ break
+ }
}
- s = ss
- if c < utf8.RuneSelf || !multibyte {
- buf = append(buf, byte(c))
- } else {
- n := utf8.EncodeRune(runeTmp[:], c)
- buf = append(buf, runeTmp[:n]...)
+
+ // Verify that the string ends with a terminating quote.
+ if !(len(in) > 0 && in[0] == quote) {
+ return "", in0, ErrSyntax
}
- if quote == '\'' && len(s) != 0 {
- // single-quoted must be single character
- return "", ErrSyntax
+ in = in[1:] // skip terminating quote
+
+ if unescape {
+ return string(buf), in, nil
}
+ return in0[:len(in0)-len(in)], in, nil
+ default:
+ return "", in, ErrSyntax
}
- return string(buf), nil
}
// bsearch16 returns the smallest i such that a[i] >= x.
import (
. "strconv"
+ "strings"
"testing"
"unicode"
)
`"\z"`,
"`",
"`xxx",
+ "``x\r",
"`\"",
`"\'"`,
`'\"'`,
func TestUnquote(t *testing.T) {
for _, tt := range unquotetests {
- if out, err := Unquote(tt.in); err != nil || out != tt.out {
- t.Errorf("Unquote(%#q) = %q, %v want %q, nil", tt.in, out, err, tt.out)
- }
+ testUnquote(t, tt.in, tt.out, nil)
}
-
- // run the quote tests too, backward
for _, tt := range quotetests {
- if in, err := Unquote(tt.out); in != tt.in {
- t.Errorf("Unquote(%#q) = %q, %v, want %q, nil", tt.out, in, err, tt.in)
- }
+ testUnquote(t, tt.out, tt.in, nil)
}
-
for _, s := range misquoted {
- if out, err := Unquote(s); out != "" || err != ErrSyntax {
- t.Errorf("Unquote(%#q) = %q, %v want %q, %v", s, out, err, "", ErrSyntax)
- }
+ testUnquote(t, s, "", ErrSyntax)
}
}
// one of:
want string
- wantErr string
+ wantErr error
}{
{in: `"foo"`, want: "foo"},
- {in: `"foo`, wantErr: "invalid syntax"},
+ {in: `"foo`, wantErr: ErrSyntax},
{in: `"` + "\xc0" + `"`, want: "\xef\xbf\xbd"},
{in: `"a` + "\xc0" + `"`, want: "a\xef\xbf\xbd"},
{in: `"\t` + "\xc0" + `"`, want: "\t\xef\xbf\xbd"},
}
- for i, tt := range tests {
- got, err := Unquote(tt.in)
- var gotErr string
- if err != nil {
- gotErr = err.Error()
- }
- if gotErr != tt.wantErr {
- t.Errorf("%d. Unquote(%q) = err %v; want %q", i, tt.in, err, tt.wantErr)
- }
- if tt.wantErr == "" && err == nil && got != tt.want {
- t.Errorf("%d. Unquote(%q) = %02x; want %02x", i, tt.in, []byte(got), []byte(tt.want))
- }
+ for _, tt := range tests {
+ testUnquote(t, tt.in, tt.want, tt.wantErr)
+ }
+}
+
+func testUnquote(t *testing.T, in, want string, wantErr error) {
+ // Test Unquote.
+ got, gotErr := Unquote(in)
+ if got != want || gotErr != wantErr {
+ t.Errorf("Unquote(%q) = (%q, %v), want (%q, %v)", in, got, gotErr, want, wantErr)
+ }
+
+ // Test QuotedPrefix.
+ // Adding an arbitrary suffix should not change the result of QuotedPrefix
+ // assume that the suffix doesn't accidentally terminate a truncated input.
+ if gotErr == nil {
+ want = in
+ }
+ suffix := "\n\r\\\"`'" // special characters for quoted strings
+ if len(in) > 0 {
+ suffix = strings.ReplaceAll(suffix, in[:1], "")
+ }
+ in += suffix
+ got, gotErr = QuotedPrefix(in)
+ if gotErr == nil && wantErr != nil {
+ _, wantErr = Unquote(got) // original input had trailing junk, reparse with only valid prefix
+ want = got
+ }
+ if got != want || gotErr != wantErr {
+ t.Errorf("QuotedPrefix(%q) = (%q, %v), want (%q, %v)", in, got, gotErr, want, wantErr)
}
}