From 8aee0b8b32c103da416369ebd16e2c07ad99badf Mon Sep 17 00:00:00 2001 From: Michael Stapelberg Date: Wed, 17 May 2017 02:05:32 -0700 Subject: [PATCH] syscall: add AmbientCaps to linux SysProcAttr Fixes #19713 Change-Id: Id1ca61b35bca2a4bea23dd64c7fb001a3a14fd88 Reviewed-on: https://go-review.googlesource.com/43512 Reviewed-by: Ian Lance Taylor --- src/syscall/exec_linux.go | 22 +++++ src/syscall/exec_linux_test.go | 159 +++++++++++++++++++++++++++++++++ 2 files changed, 181 insertions(+) diff --git a/src/syscall/exec_linux.go b/src/syscall/exec_linux.go index 5cbabf7896..fd87b86ff8 100644 --- a/src/syscall/exec_linux.go +++ b/src/syscall/exec_linux.go @@ -40,6 +40,7 @@ type SysProcAttr struct { // This parameter is no-op if GidMappings == nil. Otherwise for unprivileged // users this should be set to false for mappings work. GidMappingsEnableSetgroups bool + AmbientCaps []uintptr // Ambient capabilities (Linux only) } var ( @@ -101,6 +102,12 @@ func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr //go:noinline //go:norace func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (r1 uintptr, err1 Errno, p [2]int, locked bool) { + // Defined in linux/prctl.h starting with Linux 4.3. + const ( + PR_CAP_AMBIENT = 0x2f + PR_CAP_AMBIENT_RAISE = 0x2 + ) + // vfork requires that the child not touch any of the parent's // active stack frames. Hence, the child does all post-fork // processing in this stack frame and never returns, while the @@ -165,6 +172,14 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att runtime_AfterForkInChild() + // Enable the "keep capabilities" flag to set ambient capabilities later. + if len(sys.AmbientCaps) > 0 { + _, _, err1 = RawSyscall6(SYS_PRCTL, PR_SET_KEEPCAPS, 1, 0, 0, 0, 0) + if err1 != 0 { + goto childerror + } + } + // Wait for User ID/Group ID mappings to be written. if sys.UidMappings != nil || sys.GidMappings != nil { if _, _, err1 = RawSyscall(SYS_CLOSE, uintptr(p[1]), 0, 0); err1 != 0 { @@ -279,6 +294,13 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att } } + for _, c := range sys.AmbientCaps { + _, _, err1 = RawSyscall6(SYS_PRCTL, PR_CAP_AMBIENT, uintptr(PR_CAP_AMBIENT_RAISE), c, 0, 0, 0) + if err1 != 0 { + goto childerror + } + } + // Chdir if dir != nil { _, _, err1 = RawSyscall(SYS_CHDIR, uintptr(unsafe.Pointer(dir)), 0, 0) diff --git a/src/syscall/exec_linux_test.go b/src/syscall/exec_linux_test.go index 7a88968b49..854060b38f 100644 --- a/src/syscall/exec_linux_test.go +++ b/src/syscall/exec_linux_test.go @@ -9,13 +9,17 @@ package syscall_test import ( "flag" "fmt" + "io" "io/ioutil" "os" "os/exec" + "os/user" "path/filepath" + "strconv" "strings" "syscall" "testing" + "unsafe" ) // Check if we are in a chroot by checking if the inode of / is @@ -382,3 +386,158 @@ func TestUnshareMountNameSpaceChroot(t *testing.T) { t.Errorf("rmdir failed on %v: %v", d, err) } } + +type capHeader struct { + version uint32 + pid int +} + +type capData struct { + effective uint32 + permitted uint32 + inheritable uint32 +} + +const CAP_SYS_TIME = 25 + +type caps struct { + hdr capHeader + data [2]capData +} + +func getCaps() (caps, error) { + var c caps + + // Get capability version + if _, _, errno := syscall.Syscall(syscall.SYS_CAPGET, uintptr(unsafe.Pointer(&c.hdr)), uintptr(unsafe.Pointer(nil)), 0); errno != 0 { + return c, fmt.Errorf("SYS_CAPGET: %v", errno) + } + + // Get current capabilities + if _, _, errno := syscall.Syscall(syscall.SYS_CAPGET, uintptr(unsafe.Pointer(&c.hdr)), uintptr(unsafe.Pointer(&c.data[0])), 0); errno != 0 { + return c, fmt.Errorf("SYS_CAPGET: %v", errno) + } + + return c, nil +} + +func mustSupportAmbientCaps(t *testing.T) { + var uname syscall.Utsname + if err := syscall.Uname(&uname); err != nil { + t.Fatalf("Uname: %v", err) + } + var buf [65]byte + for i, b := range uname.Release { + buf[i] = byte(b) + } + ver := string(buf[:]) + if i := strings.Index(ver, "\x00"); i != -1 { + ver = ver[:i] + } + if strings.HasPrefix(ver, "2.") || + strings.HasPrefix(ver, "3.") || + strings.HasPrefix(ver, "4.1.") || + strings.HasPrefix(ver, "4.2.") { + t.Skipf("kernel version %q predates required 4.3; skipping test", ver) + } +} + +// TestAmbientCapsHelper isn't a real test. It's used as a helper process for +// TestAmbientCaps. +func TestAmbientCapsHelper(*testing.T) { + if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" { + return + } + defer os.Exit(0) + + caps, err := getCaps() + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(2) + } + if caps.data[0].effective&(1<