From aa1b50e1793dcbd5a23470bffd983d7c127b6cd3 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Wed, 28 Feb 2024 23:25:52 +0000 Subject: [PATCH] runtime: make tidExists more robust The LockThreadExit tests in the runtime have been observed to fail after reading /proc/self/task//stat and blindly assuming its contents followed a specific format. The parsing code is also wrong, because splitting by spaces doesn't work when the comm name contains a space. It also ignores errors without reporting them, which isn't great. This change rewrites tidExists to be more robust by using /proc/self/task//status instead. It also modifies tidExists' signature to report an error to its caller. Its caller then prints that error. Ignoring a non-not-exist error with opening this file is the likely but unconfirmed cause of #65736 (ESRCH). This change also checks for that error explicitly as an optimistic fix. Fixes #65736. Change-Id: Iea560b457d514426da2781b7eb7b8616a91ec23b Reviewed-on: https://go-review.googlesource.com/c/go/+/567938 Auto-Submit: Michael Knyszek LUCI-TryBot-Result: Go LUCI Reviewed-by: Michael Pratt --- src/runtime/testdata/testprog/lockosthread.go | 6 ++- .../testdata/testprog/syscalls_linux.go | 42 ++++++++++++++++--- .../testdata/testprog/syscalls_none.go | 4 +- 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/src/runtime/testdata/testprog/lockosthread.go b/src/runtime/testdata/testprog/lockosthread.go index 90d98e4972..63470635e7 100644 --- a/src/runtime/testdata/testprog/lockosthread.go +++ b/src/runtime/testdata/testprog/lockosthread.go @@ -90,7 +90,11 @@ func LockOSThreadAlt() { println("locked thread reused") os.Exit(1) } - exists, supported := tidExists(subTID) + exists, supported, err := tidExists(subTID) + if err != nil { + println("error:", err.Error()) + return + } if !supported || !exists { goto ok } diff --git a/src/runtime/testdata/testprog/syscalls_linux.go b/src/runtime/testdata/testprog/syscalls_linux.go index 3939b160df..5bb98d01d5 100644 --- a/src/runtime/testdata/testprog/syscalls_linux.go +++ b/src/runtime/testdata/testprog/syscalls_linux.go @@ -8,6 +8,7 @@ import ( "bytes" "fmt" "internal/testenv" + "io" "os" "syscall" ) @@ -16,14 +17,43 @@ func gettid() int { return syscall.Gettid() } -func tidExists(tid int) (exists, supported bool) { - stat, err := os.ReadFile(fmt.Sprintf("/proc/self/task/%d/stat", tid)) - if os.IsNotExist(err) { - return false, true +func tidExists(tid int) (exists, supported bool, err error) { + // Open the magic proc status file for reading with the syscall package. + // We want to identify certain valid errors very precisely. + statusFile := fmt.Sprintf("/proc/self/task/%d/status", tid) + fd, err := syscall.Open(statusFile, syscall.O_RDONLY, 0) + if errno, ok := err.(syscall.Errno); ok { + if errno == syscall.ENOENT || errno == syscall.ESRCH { + return false, true, nil + } + } + if err != nil { + return false, false, err + } + status, err := io.ReadAll(os.NewFile(uintptr(fd), statusFile)) + if err != nil { + return false, false, err + } + lines := bytes.Split(status, []byte{'\n'}) + // Find the State line. + stateLineIdx := -1 + for i, line := range lines { + if bytes.HasPrefix(line, []byte("State:")) { + stateLineIdx = i + break + } + } + if stateLineIdx < 0 { + // Malformed status file? + return false, false, fmt.Errorf("unexpected status file format: %s:\n%s", statusFile, status) + } + stateLine := bytes.SplitN(lines[stateLineIdx], []byte{':'}, 2) + if len(stateLine) != 2 { + // Malformed status file? + return false, false, fmt.Errorf("unexpected status file format: %s:\n%s", statusFile, status) } // Check if it's a zombie thread. - state := bytes.Fields(stat)[2] - return !(len(state) == 1 && state[0] == 'Z'), true + return !bytes.Contains(stateLine[1], []byte{'Z'}), true, nil } func getcwd() (string, error) { diff --git a/src/runtime/testdata/testprog/syscalls_none.go b/src/runtime/testdata/testprog/syscalls_none.go index 068bb59af3..c4c3740dc0 100644 --- a/src/runtime/testdata/testprog/syscalls_none.go +++ b/src/runtime/testdata/testprog/syscalls_none.go @@ -11,8 +11,8 @@ func gettid() int { return 0 } -func tidExists(tid int) (exists, supported bool) { - return false, false +func tidExists(tid int) (exists, supported bool, err error) { + return false, false, nil } func getcwd() (string, error) { -- 2.50.0