From 7c019c62fb32db42e946b15763217518a521404e Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Wed, 8 Mar 2023 12:49:10 +0100 Subject: [PATCH] syscall: use clone3 syscall with CLONE_NEWTIME CLONE_NEWTIME can only be used with the clone3 and unshare system calls, see https://github.com/torvalds/linux/commit/769071ac9f20b6a447410c7eaa55d1a5233ef40c: > All available clone flags have been used, so CLONE_NEWTIME uses the highest > bit of CSIGNAL. It means that it can be used only with the unshare() and > the clone3() system calls. The clone3 syscall was added in Linux kernel version 5.3 and CLONE_NEWTIME was added in version 5.6. However, it was non-functional until version 6.3 (and stable versions with the corresponding fix [1]). [1] https://lore.kernel.org/lkml/20230308105126.10107-1-tklauser@distanz.ch/ In case CLONE_NEWTIME is set in SysProcAttr.Cloneflags on an unsupported kernel version, the fork/exec call will fail. Fixes #49779 Change-Id: Ic3ecfc2b601bafaab12b1805d7f9512955a8c7e2 Reviewed-on: https://go-review.googlesource.com/c/go/+/474356 TryBot-Result: Gopher Robot Run-TryBot: Tobias Klauser Reviewed-by: Ian Lance Taylor Auto-Submit: Tobias Klauser Reviewed-by: Cherry Mui --- src/syscall/exec_linux.go | 5 ++++ src/syscall/exec_linux_test.go | 46 ++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/src/syscall/exec_linux.go b/src/syscall/exec_linux.go index a8eb4bf927..d5f00dd33e 100644 --- a/src/syscall/exec_linux.go +++ b/src/syscall/exec_linux.go @@ -293,6 +293,11 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att exitSignal: uint64(SIGCHLD), cgroup: uint64(sys.CgroupFD), } + } else if flags&CLONE_NEWTIME != 0 { + clone3 = &cloneArgs{ + flags: uint64(flags), + exitSignal: uint64(SIGCHLD), + } } // About to call fork. diff --git a/src/syscall/exec_linux_test.go b/src/syscall/exec_linux_test.go index e0aa3fe996..06b9495be2 100644 --- a/src/syscall/exec_linux_test.go +++ b/src/syscall/exec_linux_test.go @@ -487,6 +487,52 @@ func TestUseCgroupFDHelper(*testing.T) { fmt.Print(string(selfCg)) } +func TestCloneTimeNamespace(t *testing.T) { + testenv.MustHaveExec(t) + + if os.Getenv("GO_WANT_HELPER_PROCESS") == "1" { + timens, err := os.Readlink("/proc/self/ns/time") + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(2) + } + fmt.Print(string(timens)) + os.Exit(0) + } + + exe, err := os.Executable() + if err != nil { + t.Fatal(err) + } + + cmd := testenv.Command(t, exe, "-test.run=TestCloneTimeNamespace") + cmd.Env = append(cmd.Environ(), "GO_WANT_HELPER_PROCESS=1") + cmd.SysProcAttr = &syscall.SysProcAttr{ + Cloneflags: syscall.CLONE_NEWTIME, + } + out, err := cmd.CombinedOutput() + if err != nil { + if isNotSupported(err) { + // CLONE_NEWTIME does not appear to be supported. + t.Skipf("skipping, CLONE_NEWTIME not supported: %v", err) + } + t.Fatalf("Cmd failed with err %v, output: %s", err, out) + } + + // Inode numer of the time namespaces should be different. + // Based on https://man7.org/linux/man-pages/man7/time_namespaces.7.html#EXAMPLES + timens, err := os.Readlink("/proc/self/ns/time") + if err != nil { + t.Fatal(err) + } + + parentTimeNS := string(timens) + childTimeNS := string(out) + if childTimeNS == parentTimeNS { + t.Fatalf("expected child time namespace to be different from parent time namespace: %s", parentTimeNS) + } +} + type capHeader struct { version uint32 pid int32 -- 2.48.1