}{
{[]byte(`-r:foo -L/usr/white\ space/lib -lfoo\ bar -lbar\ baz`), []string{"-r:foo", "-L/usr/white space/lib", "-lfoo bar", "-lbar baz"}},
{[]byte(`-lextra\ fun\ arg\\`), []string{`-lextra fun arg\`}},
- {[]byte("\textra whitespace\r\n"), []string{"extra", "whitespace"}},
- {[]byte(" \r\n "), nil},
+ {[]byte("\textra whitespace\r\n"), []string{"extra", "whitespace\r"}},
+ {[]byte(" \r\n "), []string{"\r"}},
{[]byte(`"-r:foo" "-L/usr/white space/lib" "-lfoo bar" "-lbar baz"`), []string{"-r:foo", "-L/usr/white space/lib", "-lfoo bar", "-lbar baz"}},
{[]byte(`"-lextra fun arg\\"`), []string{`-lextra fun arg\`}},
{[]byte(`" \r\n\ "`), []string{` \r\n\ `}},
- {[]byte(`""`), nil},
+ {[]byte(`""`), []string{""}},
{[]byte(``), nil},
{[]byte(`"\\"`), []string{`\`}},
{[]byte(`"\x"`), []string{`\x`}},
{[]byte(`"\\x"`), []string{`\x`}},
- {[]byte(`'\\'`), []string{`\`}},
+ {[]byte(`'\\'`), []string{`\\`}},
{[]byte(`'\x'`), []string{`\x`}},
{[]byte(`"\\x"`), []string{`\x`}},
+ {[]byte("\\\n"), nil},
{[]byte(`-fPIC -I/test/include/foo -DQUOTED='"/test/share/doc"'`), []string{"-fPIC", "-I/test/include/foo", `-DQUOTED="/test/share/doc"`}},
{[]byte(`-fPIC -I/test/include/foo -DQUOTED="/test/share/doc"`), []string{"-fPIC", "-I/test/include/foo", "-DQUOTED=/test/share/doc"}},
{[]byte(`-fPIC -I/test/include/foo -DQUOTED=\"/test/share/doc\"`), []string{"-fPIC", "-I/test/include/foo", `-DQUOTED="/test/share/doc"`}},
} {
got, err := splitPkgConfigOutput(test.in)
if err != nil {
- t.Errorf("splitPkgConfigOutput on %v failed with error %v", test.in, err)
+ t.Errorf("splitPkgConfigOutput on %#q failed with error %v", test.in, err)
continue
}
if !reflect.DeepEqual(got, test.want) {
- t.Errorf("splitPkgConfigOutput(%v) = %v; want %v", test.in, got, test.want)
+ t.Errorf("splitPkgConfigOutput(%#q) = %#q; want %#q", test.in, got, test.want)
}
}
return envList("PKG_CONFIG", cfg.DefaultPkgConfig)[0]
}
-// splitPkgConfigOutput parses the pkg-config output into a slice of
-// flags. This implements the algorithm from pkgconf/libpkgconf/argvsplit.c.
+// splitPkgConfigOutput parses the pkg-config output into a slice of flags.
+// This implements the shell quoting semantics described in
+// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02,
+// except that it does not support parameter or arithmetic expansion or command
+// substitution and hard-codes the <blank> delimiters instead of reading them
+// from LC_LOCALE.
func splitPkgConfigOutput(out []byte) ([]string, error) {
if len(out) == 0 {
return nil, nil
}
var flags []string
flag := make([]byte, 0, len(out))
- escaped := false
- quote := byte(0)
+ didQuote := false // was the current flag parsed from a quoted string?
+ escaped := false // did we just read `\` in a non-single-quoted context?
+ quote := byte(0) // what is the quote character around the current string?
for _, c := range out {
if escaped {
- if quote != 0 {
+ if quote == '"' {
+ // “The <backslash> shall retain its special meaning as an escape
+ // character … only when followed by one of the following characters
+ // when considered special:”
switch c {
- case '$', '`', '"', '\\':
+ case '$', '`', '"', '\\', '\n':
+ // Handle the escaped character normally.
default:
- flag = append(flag, '\\')
+ // Not an escape character after all.
+ flag = append(flag, '\\', c)
+ escaped = false
+ continue
}
- flag = append(flag, c)
+ }
+
+ if c == '\n' {
+ // “If a <newline> follows the <backslash>, the shell shall interpret
+ // this as line continuation.”
} else {
flag = append(flag, c)
}
escaped = false
- } else if quote != 0 {
- if c == quote {
- quote = 0
- } else {
- switch c {
- case '\\':
- escaped = true
- default:
- flag = append(flag, c)
- }
- }
- } else if strings.IndexByte(" \t\n\v\f\r", c) < 0 {
+ continue
+ }
+
+ if quote != 0 && c == quote {
+ quote = 0
+ continue
+ }
+ switch quote {
+ case '\'':
+ // “preserve the literal value of each character”
+ flag = append(flag, c)
+ continue
+ case '"':
+ // “preserve the literal value of all characters within the double-quotes,
+ // with the exception of …”
switch c {
- case '\\':
- escaped = true
- case '\'', '"':
- quote = c
+ case '`', '$', '\\':
default:
flag = append(flag, c)
+ continue
+ }
+ }
+
+ // “The application shall quote the following characters if they are to
+ // represent themselves:”
+ switch c {
+ case '|', '&', ';', '<', '>', '(', ')', '$', '`':
+ return nil, fmt.Errorf("unexpected shell character %q in pkgconf output", c)
+
+ case '\\':
+ // “A <backslash> that is not quoted shall preserve the literal value of
+ // the following character, with the exception of a <newline>.”
+ escaped = true
+ continue
+
+ case '"', '\'':
+ quote = c
+ didQuote = true
+ continue
+
+ case ' ', '\t', '\n':
+ if len(flag) > 0 || didQuote {
+ flags = append(flags, string(flag))
}
- } else if len(flag) != 0 {
- flags = append(flags, string(flag))
- flag = flag[:0]
+ flag, didQuote = flag[:0], false
+ continue
}
+
+ flag = append(flag, c)
+ }
+
+ // Prefer to report a missing quote instead of a missing escape. If the string
+ // is something like `"foo\`, it's ambiguous as to whether the trailing
+ // backslash is really an escape at all.
+ if quote != 0 {
+ return nil, errors.New("unterminated quoted string in pkgconf output")
}
if escaped {
- return nil, errors.New("broken character escaping in pkgconf output ")
+ return nil, errors.New("broken character escaping in pkgconf output")
}
- if quote != 0 {
- return nil, errors.New("unterminated quoted string in pkgconf output ")
- } else if len(flag) != 0 {
+
+ if len(flag) > 0 || didQuote {
flags = append(flags, string(flag))
}
-
return flags, nil
}
return nil, nil, err
}
if len(out) > 0 {
- cflags, err = splitPkgConfigOutput(out)
+ cflags, err = splitPkgConfigOutput(bytes.TrimSpace(out))
if err != nil {
return nil, nil, err
}
--- /dev/null
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build unix
+
+package work
+
+import (
+ "bytes"
+ "internal/testenv"
+ "strings"
+ "testing"
+ "unicode"
+)
+
+func FuzzSplitPkgConfigOutput(f *testing.F) {
+ testenv.MustHaveExecPath(f, "/bin/sh")
+
+ f.Add([]byte(`$FOO`))
+ f.Add([]byte(`\$FOO`))
+ f.Add([]byte(`${FOO}`))
+ f.Add([]byte(`\${FOO}`))
+ f.Add([]byte(`$(/bin/false)`))
+ f.Add([]byte(`\$(/bin/false)`))
+ f.Add([]byte(`$((0))`))
+ f.Add([]byte(`\$((0))`))
+ f.Add([]byte(`unescaped space`))
+ f.Add([]byte(`escaped\ space`))
+ f.Add([]byte(`"unterminated quote`))
+ f.Add([]byte(`'unterminated quote`))
+ f.Add([]byte(`unterminated escape\`))
+ f.Add([]byte(`"quote with unterminated escape\`))
+ f.Add([]byte(`'quoted "double quotes"'`))
+ f.Add([]byte(`"quoted 'single quotes'"`))
+ f.Add([]byte(`"\$0"`))
+ f.Add([]byte(`"\$\0"`))
+ f.Add([]byte(`"\$"`))
+ f.Add([]byte(`"\$ "`))
+
+ // Example positive inputs from TestSplitPkgConfigOutput.
+ // Some bare newlines have been removed so that the inputs
+ // are valid in the shell script we use for comparison.
+ f.Add([]byte(`-r:foo -L/usr/white\ space/lib -lfoo\ bar -lbar\ baz`))
+ f.Add([]byte(`-lextra\ fun\ arg\\`))
+ f.Add([]byte("\textra whitespace\r"))
+ f.Add([]byte(" \r "))
+ f.Add([]byte(`"-r:foo" "-L/usr/white space/lib" "-lfoo bar" "-lbar baz"`))
+ f.Add([]byte(`"-lextra fun arg\\"`))
+ f.Add([]byte(`" \r\n\ "`))
+ f.Add([]byte(`""`))
+ f.Add([]byte(``))
+ f.Add([]byte(`"\\"`))
+ f.Add([]byte(`"\x"`))
+ f.Add([]byte(`"\\x"`))
+ f.Add([]byte(`'\\'`))
+ f.Add([]byte(`'\x'`))
+ f.Add([]byte(`"\\x"`))
+ f.Add([]byte("\\\n"))
+ f.Add([]byte(`-fPIC -I/test/include/foo -DQUOTED='"/test/share/doc"'`))
+ f.Add([]byte(`-fPIC -I/test/include/foo -DQUOTED="/test/share/doc"`))
+ f.Add([]byte(`-fPIC -I/test/include/foo -DQUOTED=\"/test/share/doc\"`))
+ f.Add([]byte(`-fPIC -I/test/include/foo -DQUOTED='/test/share/doc'`))
+ f.Add([]byte(`-DQUOTED='/te\st/share/d\oc'`))
+ f.Add([]byte(`-Dhello=10 -Dworld=+32 -DDEFINED_FROM_PKG_CONFIG=hello\ world`))
+ f.Add([]byte(`"broken\"" \\\a "a"`))
+
+ // Example negative inputs from TestSplitPkgConfigOutput.
+ f.Add([]byte(`" \r\n `))
+ f.Add([]byte(`"-r:foo" "-L/usr/white space/lib "-lfoo bar" "-lbar baz"`))
+ f.Add([]byte(`"-lextra fun arg\\`))
+ f.Add([]byte(`broken flag\`))
+ f.Add([]byte(`extra broken flag \`))
+ f.Add([]byte(`\`))
+ f.Add([]byte(`"broken\"" "extra" \`))
+
+ f.Fuzz(func(t *testing.T, b []byte) {
+ t.Parallel()
+
+ if bytes.ContainsAny(b, "*?[#~%\x00{}!") {
+ t.Skipf("skipping %#q: contains a sometimes-quoted character", b)
+ }
+ // splitPkgConfigOutput itself rejects inputs that contain unquoted
+ // shell operator characters. (Quoted shell characters are fine.)
+
+ for _, c := range b {
+ if c > unicode.MaxASCII {
+ t.Skipf("skipping %#q: contains a non-ASCII character %q", b, c)
+ }
+ if !unicode.IsGraphic(rune(c)) && !unicode.IsSpace(rune(c)) {
+ t.Skipf("skipping %#q: contains non-graphic character %q", b, c)
+ }
+ }
+
+ args, err := splitPkgConfigOutput(b)
+ if err != nil {
+ // We haven't checked that the shell would actually reject this input too,
+ // but if splitPkgConfigOutput rejected it it's probably too dangerous to
+ // run in the script.
+ t.Logf("%#q: %v", b, err)
+ return
+ }
+ t.Logf("splitPkgConfigOutput(%#q) = %#q", b, args)
+ if len(args) == 0 {
+ t.Skipf("skipping %#q: contains no arguments", b)
+ }
+
+ var buf strings.Builder
+ for _, arg := range args {
+ buf.WriteString(arg)
+ buf.WriteString("\n")
+ }
+ wantOut := buf.String()
+
+ if strings.Count(wantOut, "\n") != len(args)+bytes.Count(b, []byte("\n")) {
+ // One of the newlines in b was treated as a delimiter and not part of an
+ // argument. Our bash test script would interpret that as a syntax error.
+ t.Skipf("skipping %#q: contains a bare newline", b)
+ }
+
+ // We use the printf shell command to echo the arguments because, per
+ // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/echo.html#tag_20_37_16:
+ // “It is not possible to use echo portably across all POSIX systems unless
+ // both -n (as the first argument) and escape sequences are omitted.”
+ cmd := testenv.Command(t, "/bin/sh", "-c", "printf '%s\n' "+string(b))
+ cmd.Env = append(cmd.Environ(), "LC_ALL=POSIX", "POSIXLY_CORRECT=1")
+ cmd.Stderr = new(strings.Builder)
+ out, err := cmd.Output()
+ if err != nil {
+ t.Fatalf("%#q: %v\n%s", cmd.Args, err, cmd.Stderr)
+ }
+
+ if string(out) != wantOut {
+ t.Logf("%#q:\n%#q", cmd.Args, out)
+ t.Logf("want:\n%#q", wantOut)
+ t.Errorf("parsed args do not match")
+ }
+ })
+}