]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/go: fuzz test splitPkgConfigOutput and fix minor bugs
authorBryan C. Mills <bcmills@google.com>
Thu, 9 Feb 2023 21:35:04 +0000 (16:35 -0500)
committerGopher Robot <gobot@golang.org>
Wed, 15 Feb 2023 15:30:05 +0000 (15:30 +0000)
In reviewing CL 466875, I noticed that the implementation of
splitPkgConfigOutput from CL 86541 referred to another specific
implementation, and that implementation has had recent changes to fix
deviations from the POSIX specification for shell argument parsing.

Curious about those changes, I decided to fuzz the function to check
whether it agreed in practice with the way a real shell parses
arguments in POSIX mode. It turned out to deviate in several edge
cases, such as backslash-escapes within single quotes, quoted empty
strings, and carriage returns. (We do not expect to see carriage
returns in pkg-config output anyway, but the quote handling might
matter.)

This change updates the implementation to refer to the POSIX
documentation instead of another implementation, and confirms the
behavior with a fuzz test. It may introduce minor deviations from the
pkgconf implementation that was previously used as a reference, but if
so it is plausible that those could be fixed upstream in pkgconf
(like the other recent changes there).

For #35262.
Updates ##23373.

Change-Id: Ifab76e94af0ca9a6d826379f4a6e2028561e615c
Reviewed-on: https://go-review.googlesource.com/c/go/+/466864
Reviewed-by: Ian Lance Taylor <iant@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Bryan Mills <bcmills@google.com>
Auto-Submit: Bryan Mills <bcmills@google.com>

src/cmd/go/internal/work/build_test.go
src/cmd/go/internal/work/exec.go
src/cmd/go/internal/work/shell_test.go [new file with mode: 0644]

index 9e32b39c39afddaa3e2a7c37673bddfeaabc0015..b9c16eea74b5f4a12ded9379c555092603a08ff8 100644 (file)
@@ -41,19 +41,20 @@ func TestSplitPkgConfigOutput(t *testing.T) {
        }{
                {[]byte(`-r:foo -L/usr/white\ space/lib -lfoo\ bar -lbar\ baz`), []string{"-r:foo", "-L/usr/white space/lib", "-lfoo bar", "-lbar baz"}},
                {[]byte(`-lextra\ fun\ arg\\`), []string{`-lextra fun arg\`}},
-               {[]byte("\textra     whitespace\r\n"), []string{"extra", "whitespace"}},
-               {[]byte("     \r\n      "), nil},
+               {[]byte("\textra     whitespace\r\n"), []string{"extra", "whitespace\r"}},
+               {[]byte("     \r\n      "), []string{"\r"}},
                {[]byte(`"-r:foo" "-L/usr/white space/lib" "-lfoo bar" "-lbar baz"`), []string{"-r:foo", "-L/usr/white space/lib", "-lfoo bar", "-lbar baz"}},
                {[]byte(`"-lextra fun arg\\"`), []string{`-lextra fun arg\`}},
                {[]byte(`"     \r\n\      "`), []string{`     \r\n\      `}},
-               {[]byte(`""`), nil},
+               {[]byte(`""`), []string{""}},
                {[]byte(``), nil},
                {[]byte(`"\\"`), []string{`\`}},
                {[]byte(`"\x"`), []string{`\x`}},
                {[]byte(`"\\x"`), []string{`\x`}},
-               {[]byte(`'\\'`), []string{`\`}},
+               {[]byte(`'\\'`), []string{`\\`}},
                {[]byte(`'\x'`), []string{`\x`}},
                {[]byte(`"\\x"`), []string{`\x`}},
+               {[]byte("\\\n"), nil},
                {[]byte(`-fPIC -I/test/include/foo -DQUOTED='"/test/share/doc"'`), []string{"-fPIC", "-I/test/include/foo", `-DQUOTED="/test/share/doc"`}},
                {[]byte(`-fPIC -I/test/include/foo -DQUOTED="/test/share/doc"`), []string{"-fPIC", "-I/test/include/foo", "-DQUOTED=/test/share/doc"}},
                {[]byte(`-fPIC -I/test/include/foo -DQUOTED=\"/test/share/doc\"`), []string{"-fPIC", "-I/test/include/foo", `-DQUOTED="/test/share/doc"`}},
@@ -64,11 +65,11 @@ func TestSplitPkgConfigOutput(t *testing.T) {
        } {
                got, err := splitPkgConfigOutput(test.in)
                if err != nil {
-                       t.Errorf("splitPkgConfigOutput on %v failed with error %v", test.in, err)
+                       t.Errorf("splitPkgConfigOutput on %#q failed with error %v", test.in, err)
                        continue
                }
                if !reflect.DeepEqual(got, test.want) {
-                       t.Errorf("splitPkgConfigOutput(%v) = %v; want %v", test.in, got, test.want)
+                       t.Errorf("splitPkgConfigOutput(%#q) = %#q; want %#q", test.in, got, test.want)
                }
        }
 
index c1476f8757ade3781220332267dd3c9c45ee894b..b211680e1cc434c196ecf12e7d9d2ead021e8d2b 100644 (file)
@@ -1446,64 +1446,110 @@ func (b *Builder) PkgconfigCmd() string {
        return envList("PKG_CONFIG", cfg.DefaultPkgConfig)[0]
 }
 
-// splitPkgConfigOutput parses the pkg-config output into a slice of
-// flags. This implements the algorithm from pkgconf/libpkgconf/argvsplit.c.
+// splitPkgConfigOutput parses the pkg-config output into a slice of flags.
+// This implements the shell quoting semantics described in
+// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02,
+// except that it does not support parameter or arithmetic expansion or command
+// substitution and hard-codes the <blank> delimiters instead of reading them
+// from LC_LOCALE.
 func splitPkgConfigOutput(out []byte) ([]string, error) {
        if len(out) == 0 {
                return nil, nil
        }
        var flags []string
        flag := make([]byte, 0, len(out))
-       escaped := false
-       quote := byte(0)
+       didQuote := false // was the current flag parsed from a quoted string?
+       escaped := false  // did we just read `\` in a non-single-quoted context?
+       quote := byte(0)  // what is the quote character around the current string?
 
        for _, c := range out {
                if escaped {
-                       if quote != 0 {
+                       if quote == '"' {
+                               // “The <backslash> shall retain its special meaning as an escape
+                               // character … only when followed by one of the following characters
+                               // when considered special:”
                                switch c {
-                               case '$', '`', '"', '\\':
+                               case '$', '`', '"', '\\', '\n':
+                                       // Handle the escaped character normally.
                                default:
-                                       flag = append(flag, '\\')
+                                       // Not an escape character after all.
+                                       flag = append(flag, '\\', c)
+                                       escaped = false
+                                       continue
                                }
-                               flag = append(flag, c)
+                       }
+
+                       if c == '\n' {
+                               // “If a <newline> follows the <backslash>, the shell shall interpret
+                               // this as line continuation.”
                        } else {
                                flag = append(flag, c)
                        }
                        escaped = false
-               } else if quote != 0 {
-                       if c == quote {
-                               quote = 0
-                       } else {
-                               switch c {
-                               case '\\':
-                                       escaped = true
-                               default:
-                                       flag = append(flag, c)
-                               }
-                       }
-               } else if strings.IndexByte(" \t\n\v\f\r", c) < 0 {
+                       continue
+               }
+
+               if quote != 0 && c == quote {
+                       quote = 0
+                       continue
+               }
+               switch quote {
+               case '\'':
+                       // “preserve the literal value of each character”
+                       flag = append(flag, c)
+                       continue
+               case '"':
+                       // “preserve the literal value of all characters within the double-quotes,
+                       // with the exception of …”
                        switch c {
-                       case '\\':
-                               escaped = true
-                       case '\'', '"':
-                               quote = c
+                       case '`', '$', '\\':
                        default:
                                flag = append(flag, c)
+                               continue
+                       }
+               }
+
+               // “The application shall quote the following characters if they are to
+               // represent themselves:”
+               switch c {
+               case '|', '&', ';', '<', '>', '(', ')', '$', '`':
+                       return nil, fmt.Errorf("unexpected shell character %q in pkgconf output", c)
+
+               case '\\':
+                       // “A <backslash> that is not quoted shall preserve the literal value of
+                       // the following character, with the exception of a <newline>.”
+                       escaped = true
+                       continue
+
+               case '"', '\'':
+                       quote = c
+                       didQuote = true
+                       continue
+
+               case ' ', '\t', '\n':
+                       if len(flag) > 0 || didQuote {
+                               flags = append(flags, string(flag))
                        }
-               } else if len(flag) != 0 {
-                       flags = append(flags, string(flag))
-                       flag = flag[:0]
+                       flag, didQuote = flag[:0], false
+                       continue
                }
+
+               flag = append(flag, c)
+       }
+
+       // Prefer to report a missing quote instead of a missing escape. If the string
+       // is something like `"foo\`, it's ambiguous as to whether the trailing
+       // backslash is really an escape at all.
+       if quote != 0 {
+               return nil, errors.New("unterminated quoted string in pkgconf output")
        }
        if escaped {
-               return nil, errors.New("broken character escaping in pkgconf output ")
+               return nil, errors.New("broken character escaping in pkgconf output")
        }
-       if quote != 0 {
-               return nil, errors.New("unterminated quoted string in pkgconf output ")
-       } else if len(flag) != 0 {
+
+       if len(flag) > 0 || didQuote {
                flags = append(flags, string(flag))
        }
-
        return flags, nil
 }
 
@@ -1535,7 +1581,7 @@ func (b *Builder) getPkgConfigFlags(p *load.Package) (cflags, ldflags []string,
                        return nil, nil, err
                }
                if len(out) > 0 {
-                       cflags, err = splitPkgConfigOutput(out)
+                       cflags, err = splitPkgConfigOutput(bytes.TrimSpace(out))
                        if err != nil {
                                return nil, nil, err
                        }
diff --git a/src/cmd/go/internal/work/shell_test.go b/src/cmd/go/internal/work/shell_test.go
new file mode 100644 (file)
index 0000000..24bef4e
--- /dev/null
@@ -0,0 +1,139 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build unix
+
+package work
+
+import (
+       "bytes"
+       "internal/testenv"
+       "strings"
+       "testing"
+       "unicode"
+)
+
+func FuzzSplitPkgConfigOutput(f *testing.F) {
+       testenv.MustHaveExecPath(f, "/bin/sh")
+
+       f.Add([]byte(`$FOO`))
+       f.Add([]byte(`\$FOO`))
+       f.Add([]byte(`${FOO}`))
+       f.Add([]byte(`\${FOO}`))
+       f.Add([]byte(`$(/bin/false)`))
+       f.Add([]byte(`\$(/bin/false)`))
+       f.Add([]byte(`$((0))`))
+       f.Add([]byte(`\$((0))`))
+       f.Add([]byte(`unescaped space`))
+       f.Add([]byte(`escaped\ space`))
+       f.Add([]byte(`"unterminated quote`))
+       f.Add([]byte(`'unterminated quote`))
+       f.Add([]byte(`unterminated escape\`))
+       f.Add([]byte(`"quote with unterminated escape\`))
+       f.Add([]byte(`'quoted "double quotes"'`))
+       f.Add([]byte(`"quoted 'single quotes'"`))
+       f.Add([]byte(`"\$0"`))
+       f.Add([]byte(`"\$\0"`))
+       f.Add([]byte(`"\$"`))
+       f.Add([]byte(`"\$ "`))
+
+       // Example positive inputs from TestSplitPkgConfigOutput.
+       // Some bare newlines have been removed so that the inputs
+       // are valid in the shell script we use for comparison.
+       f.Add([]byte(`-r:foo -L/usr/white\ space/lib -lfoo\ bar -lbar\ baz`))
+       f.Add([]byte(`-lextra\ fun\ arg\\`))
+       f.Add([]byte("\textra     whitespace\r"))
+       f.Add([]byte("     \r      "))
+       f.Add([]byte(`"-r:foo" "-L/usr/white space/lib" "-lfoo bar" "-lbar baz"`))
+       f.Add([]byte(`"-lextra fun arg\\"`))
+       f.Add([]byte(`"     \r\n\      "`))
+       f.Add([]byte(`""`))
+       f.Add([]byte(``))
+       f.Add([]byte(`"\\"`))
+       f.Add([]byte(`"\x"`))
+       f.Add([]byte(`"\\x"`))
+       f.Add([]byte(`'\\'`))
+       f.Add([]byte(`'\x'`))
+       f.Add([]byte(`"\\x"`))
+       f.Add([]byte("\\\n"))
+       f.Add([]byte(`-fPIC -I/test/include/foo -DQUOTED='"/test/share/doc"'`))
+       f.Add([]byte(`-fPIC -I/test/include/foo -DQUOTED="/test/share/doc"`))
+       f.Add([]byte(`-fPIC -I/test/include/foo -DQUOTED=\"/test/share/doc\"`))
+       f.Add([]byte(`-fPIC -I/test/include/foo -DQUOTED='/test/share/doc'`))
+       f.Add([]byte(`-DQUOTED='/te\st/share/d\oc'`))
+       f.Add([]byte(`-Dhello=10 -Dworld=+32 -DDEFINED_FROM_PKG_CONFIG=hello\ world`))
+       f.Add([]byte(`"broken\"" \\\a "a"`))
+
+       // Example negative inputs from TestSplitPkgConfigOutput.
+       f.Add([]byte(`"     \r\n      `))
+       f.Add([]byte(`"-r:foo" "-L/usr/white space/lib "-lfoo bar" "-lbar baz"`))
+       f.Add([]byte(`"-lextra fun arg\\`))
+       f.Add([]byte(`broken flag\`))
+       f.Add([]byte(`extra broken flag \`))
+       f.Add([]byte(`\`))
+       f.Add([]byte(`"broken\"" "extra" \`))
+
+       f.Fuzz(func(t *testing.T, b []byte) {
+               t.Parallel()
+
+               if bytes.ContainsAny(b, "*?[#~%\x00{}!") {
+                       t.Skipf("skipping %#q: contains a sometimes-quoted character", b)
+               }
+               // splitPkgConfigOutput itself rejects inputs that contain unquoted
+               // shell operator characters. (Quoted shell characters are fine.)
+
+               for _, c := range b {
+                       if c > unicode.MaxASCII {
+                               t.Skipf("skipping %#q: contains a non-ASCII character %q", b, c)
+                       }
+                       if !unicode.IsGraphic(rune(c)) && !unicode.IsSpace(rune(c)) {
+                               t.Skipf("skipping %#q: contains non-graphic character %q", b, c)
+                       }
+               }
+
+               args, err := splitPkgConfigOutput(b)
+               if err != nil {
+                       // We haven't checked that the shell would actually reject this input too,
+                       // but if splitPkgConfigOutput rejected it it's probably too dangerous to
+                       // run in the script.
+                       t.Logf("%#q: %v", b, err)
+                       return
+               }
+               t.Logf("splitPkgConfigOutput(%#q) = %#q", b, args)
+               if len(args) == 0 {
+                       t.Skipf("skipping %#q: contains no arguments", b)
+               }
+
+               var buf strings.Builder
+               for _, arg := range args {
+                       buf.WriteString(arg)
+                       buf.WriteString("\n")
+               }
+               wantOut := buf.String()
+
+               if strings.Count(wantOut, "\n") != len(args)+bytes.Count(b, []byte("\n")) {
+                       // One of the newlines in b was treated as a delimiter and not part of an
+                       // argument. Our bash test script would interpret that as a syntax error.
+                       t.Skipf("skipping %#q: contains a bare newline", b)
+               }
+
+               // We use the printf shell command to echo the arguments because, per
+               // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/echo.html#tag_20_37_16:
+               // “It is not possible to use echo portably across all POSIX systems unless
+               // both -n (as the first argument) and escape sequences are omitted.”
+               cmd := testenv.Command(t, "/bin/sh", "-c", "printf '%s\n' "+string(b))
+               cmd.Env = append(cmd.Environ(), "LC_ALL=POSIX", "POSIXLY_CORRECT=1")
+               cmd.Stderr = new(strings.Builder)
+               out, err := cmd.Output()
+               if err != nil {
+                       t.Fatalf("%#q: %v\n%s", cmd.Args, err, cmd.Stderr)
+               }
+
+               if string(out) != wantOut {
+                       t.Logf("%#q:\n%#q", cmd.Args, out)
+                       t.Logf("want:\n%#q", wantOut)
+                       t.Errorf("parsed args do not match")
+               }
+       })
+}