//
// This must not be used in parallel tests, as it affects the entire process.
func InCgroupV2(t *testing.T, fn func(*CgroupV2)) {
- mount, rel := findCurrent(t)
- parent := findOwnedParent(t, mount, rel)
- orig := filepath.Join(mount, rel)
+ orig := findCurrent(t)
+ parent := findOwnedParent(t, orig)
// Make sure the parent allows children to control cpu.
b, err := os.ReadFile(filepath.Join(parent, "cgroup.subtree_control"))
fn(c)
}
-// Returns the mount and relative directory of the current cgroup the process
-// is in.
-func findCurrent(t *testing.T) (string, string) {
+// Returns the filesystem path to the current cgroup the process is in.
+func findCurrent(t *testing.T) string {
// Find the path to our current CPU cgroup. Currently this package is
// only used for CPU cgroup testing, so the distinction of different
// controllers doesn't matter.
var scratch [cgroup.ParseSize]byte
buf := make([]byte, cgroup.PathSize)
- n, err := cgroup.FindCPUMountPoint(buf, scratch[:])
+ n, ver, err := cgroup.FindCPU(buf, scratch[:])
if err != nil {
t.Skipf("cgroup: unable to find current cgroup mount: %v", err)
}
- mount := string(buf[:n])
-
- n, ver, err := cgroup.FindCPURelativePath(buf, scratch[:])
- if err != nil {
- t.Skipf("cgroup: unable to find current cgroup path: %v", err)
- }
if ver != cgroup.V2 {
t.Skipf("cgroup: running on cgroup v%d want v2", ver)
}
- rel := string(buf[1:n]) // The returned path always starts with /, skip it.
- rel = filepath.Join(".", rel) // Make sure this isn't empty string at root.
- return mount, rel
+ return string(buf[:n])
}
// Returns a parent directory in which we can create our own cgroup subdirectory.
-func findOwnedParent(t *testing.T, mount, rel string) string {
+func findOwnedParent(t *testing.T, orig string) string {
// There are many ways cgroups may be set up on a system. We don't try
// to cover all of them, just common ones.
//
// We want to create our own subdirectory that we can migrate into and
// then manipulate at will. It is tempting to create a new subdirectory
- // inside the current cgroup we are already in, however that will likey
+ // inside the current cgroup we are already in, however that will likely
// not work. cgroup v2 only allows processes to be in leaf cgroups. Our
// current cgroup likely contains multiple processes (at least this one
// and the cmd/go test runner). If we make a subdirectory and try to
// is empty. As far as I tell, the only purpose of this is to allow
// reorganizing processes into a new set of subdirectories and then
// adding controllers once done.
- root, err := os.OpenRoot(mount)
+ var stat syscall.Stat_t
+ err := syscall.Stat(orig, &stat)
if err != nil {
- t.Fatalf("error opening cgroup mount root: %v", err)
+ t.Fatalf("error stating orig cgroup: %v", err)
}
uid := os.Getuid()
var prev string
- for rel != "." {
- fi, err := root.Stat(rel)
+ cur := filepath.Dir(orig)
+ for cur != "/" {
+ var curStat syscall.Stat_t
+ err = syscall.Stat(cur, &curStat)
if err != nil {
t.Fatalf("error stating cgroup path: %v", err)
}
- st := fi.Sys().(*syscall.Stat_t)
- if int(st.Uid) != uid {
- // Stop at first directory we don't own.
+ if int(curStat.Uid) != uid || curStat.Dev != stat.Dev {
+ // Stop at first directory we don't own or filesystem boundary.
break
}
- prev = rel
- rel = filepath.Join(rel, "..")
+ prev = cur
+ cur = filepath.Dir(cur)
}
if prev == "" {
}
// We actually want the last directory where we were the owner.
- return filepath.Join(mount, prev)
+ return prev
}
// Migrate the current process to the cgroup directory dst.
return float64(quota) / float64(period), true, nil
}
-// Finds the path of the current process's CPU cgroup relative to the cgroup
-// mount and writes it to out.
+// Finds the path of the current process's CPU cgroup and writes it to out.
//
+// fd is a file descriptor for /proc/self/cgroup.
// Returns the number of bytes written and the cgroup version (1 or 2).
-func parseCPURelativePath(fd int, read func(fd int, b []byte) (int, uintptr), out []byte, scratch []byte) (int, Version, error) {
+func parseCPUCgroup(fd int, read func(fd int, b []byte) (int, uintptr), out []byte, scratch []byte) (int, Version, error) {
// The format of each line is
//
// hierarchy-ID:controller-list:cgroup-path
//
// controller-list is comma-separated.
- // See man 5 cgroup for more details.
//
// cgroup v2 has hierarchy-ID 0. If a v1 hierarchy contains "cpu", that
// is the CPU controller. Otherwise the v2 hierarchy (if any) is the
- // CPU controller.
+ // CPU controller. It is not possible to mount the same controller
+ // simultaneously under both the v1 and the v2 hierarchies.
+ //
+ // See man 7 cgroups for more details.
//
// hierarchy-ID and controller-list have relatively small maximum
// sizes, and the path can be up to _PATH_MAX, so we need a bit more
// hierarchy-ID:controller-list:cgroup-path
//
// controller-list is comma-separated.
- // See man 5 cgroup for more details.
+ // See man 7 cgroups for more details.
i := bytealg.IndexByte(line, ':')
if i < 0 {
return 0, 0, errMalformedFile
line = line[i+1:]
path := line
+ if len(path) == 0 || path[0] != '/' {
+ // We rely on this when composing the full path.
+ return 0, 0, errMalformedFile
+ }
+ if len(path) > len(out) {
+ // Should not be possible. If we really get a very long cgroup path,
+ // read /proc/self/cgroup will fail with ENAMETOOLONG.
+ return 0, 0, errPathTooLong
+ }
if string(hierarchy) == "0" {
// v2 hierarchy.
return false
}
-// Returns the mount point for the cpu cgroup controller (v1 or v2) from
-// /proc/self/mountinfo.
-func parseCPUMount(fd int, read func(fd int, b []byte) (int, uintptr), out []byte, scratch []byte) (int, error) {
+// Returns the path to the specified cgroup and version with cpu controller
+//
+// fd is a file descriptor for /proc/self/mountinfo.
+// Returns the number of bytes written.
+func parseCPUMount(fd int, read func(fd int, b []byte) (int, uintptr), out, cgroup []byte, version Version, scratch []byte) (int, error) {
// The format of each line is:
//
// 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
// carriage return. Those are escaped. See Linux show_mountinfo ->
// show_path. We must unescape before returning.
//
- // We return the mount point (5) if the filesystem type (9) is cgroup2,
- // or cgroup with "cpu" in the super options (11).
+ // A mount point matches if the filesystem type (9) is cgroup2,
+ // or cgroup with "cpu" in the super options (11),
+ // and the cgroup is in the root (4). If there are multiple matches,
+ // the first one is selected.
+ //
+ // We return full cgroup path, which is the mount point (5) +
+ // cgroup parameter without the root (4) prefix.
//
// (4), (5), and (10) are up to _PATH_MAX. The remaining fields have a
// small fixed maximum size, so 4*_PATH_MAX is plenty of scratch space.
l := newLineReader(fd, scratch, read)
- // Bytes written to out.
- n := 0
-
for {
- //incomplete := false
err := l.next()
if err == errIncompleteLine {
// An incomplete line is fine as long as it doesn't
line := l.line()
- // Skip first four fields.
- for range 4 {
+ // Skip first three fields.
+ for range 3 {
i := bytealg.IndexByte(line, ' ')
if i < 0 {
return 0, errMalformedFile
line = line[i+1:]
}
- // (5) mount point: mount point relative to the process's root
+ // (4) root: root of the mount within the filesystem
i := bytealg.IndexByte(line, ' ')
if i < 0 {
return 0, errMalformedFile
}
+ root := line[:i]
+ if len(root) == 0 || root[0] != '/' {
+ // We rely on this in hasPathPrefix.
+ return 0, errMalformedFile
+ }
+ line = line[i+1:]
+
+ // (5) mount point: mount point relative to the process's root
+ i = bytealg.IndexByte(line, ' ')
+ if i < 0 {
+ return 0, errMalformedFile
+ }
mnt := line[:i]
line = line[i+1:]
ftype := line[:i]
line = line[i+1:]
- if string(ftype) != "cgroup" && string(ftype) != "cgroup2" {
- continue
- }
+ switch version {
+ case V1:
+ if string(ftype) != "cgroup" {
+ continue
+ }
+ // (10) mount source: filesystem specific information or "none"
+ i = bytealg.IndexByte(line, ' ')
+ if i < 0 {
+ return 0, errMalformedFile
+ }
+ // Don't care about mount source.
+ line = line[i+1:]
- // As in findCPUPath, cgroup v1 with a CPU controller takes
- // precendence over cgroup v2.
- if string(ftype) == "cgroup2" {
- // v2 hierarchy.
- n, err = unescapePath(out, mnt)
- if err != nil {
- // Don't keep searching on error. The kernel
- // should never produce broken escaping.
- return n, err
+ // (11) super options: per super block options
+ if !containsCPU(line) {
+ continue
}
- // Keep searching, we might find a v1 hierarchy with a
- // CPU controller, which takes precedence.
- continue
+ case V2:
+ if string(ftype) != "cgroup2" {
+ continue
+ }
+ default:
+ throw("impossible cgroup version")
+ panic("unreachable")
}
- // (10) mount source: filesystem specific information or "none"
- i = bytealg.IndexByte(line, ' ')
- if i < 0 {
- return 0, errMalformedFile
+ // Check cgroup is in the root.
+ // If the cgroup is /sandbox/container, the matching mount point root could be
+ // /sandbox/container, /sandbox, or /
+ rootLen, err := unescapePath(root, root)
+ if err != nil {
+ return 0, err
+ }
+ root = root[:rootLen]
+ if !hasPathPrefix(cgroup, root) {
+ continue // not matched, this is not the mount point we're looking for
}
- // Don't care about mount source.
- line = line[i+1:]
- // (11) super options: per super block options
- superOpt := line
+ // Cutoff the root from cgroup, ensure rel starts with '/' or is empty.
+ rel := cgroup[rootLen:]
+ if rootLen == 1 && len(cgroup) > 1 {
+ // root is "/", but cgroup is not. Keep full cgroup path.
+ rel = cgroup
+ }
+ if hasPathPrefix(rel, []byte("/..")) {
+ // the cgroup is out of current cgroup namespace, and this mount point
+ // cannot reach that cgroup.
+ //
+ // e.g. If the process is in cgroup /init, but in a cgroup namespace
+ // rooted at /sandbox/container, /proc/self/cgroup will show /../../init.
+ // we can reach it if the mount point root is
+ // /../.. or /../../init, but not if it is /.. or /
+ // While mount point with root /../../.. should able to reach the cgroup,
+ // we don't know the path to the cgroup within that mount point.
+ continue
+ }
- // v1 hierarchy
- if containsCPU(superOpt) {
- // Found a v1 CPU controller. This must be the
- // only one, so we're done.
- return unescapePath(out, mnt)
+ // All conditions met, compose the full path.
+ // Copy rel to the correct place first, it may overlap with out.
+ n := unescapedLen(mnt)
+ if n+len(rel) > len(out) {
+ return 0, errPathTooLong
+ }
+ copy(out[n:], rel)
+ n2, err := unescapePath(out[:n], mnt)
+ if err != nil {
+ return 0, err
+ }
+ if n2 != n {
+ throw("wrong unescaped len")
}
+ return n + len(rel), nil
}
- if n == 0 {
- // Found nothing.
- return 0, ErrNoCgroup
- }
+ // Found nothing.
+ return 0, ErrNoCgroup
+}
- return n, nil
+func hasPathPrefix(p, prefix []byte) bool {
+ i := len(prefix)
+ if i == 1 {
+ return true // root contains everything
+ }
+ if len(p) < i || !bytealg.Equal(prefix, p[:i]) {
+ return false
+ }
+ return len(p) == i || p[i] == '/' // must match at path boundary
}
-var errInvalidEscape error = stringError("invalid path escape sequence")
+var (
+ errInvalidEscape error = stringError("invalid path escape sequence")
+ errPathTooLong error = stringError("path too long")
+)
+
+func unescapedLen(in []byte) int {
+ return len(in) - bytealg.Count(in, byte('\\'))*3
+}
// unescapePath copies in to out, unescaping escape sequences generated by
// Linux's show_path.
// That is, '\', ' ', '\t', and '\n' are converted to octal escape sequences,
// like '\040' for space.
//
-// out must be at least as large as in.
+// Caller must ensure that out at least has unescapedLen(in) bytes.
+// in and out may alias; in-place unescaping is supported.
//
// Returns the number of bytes written to out.
//
// Also see escapePath in cgroup_linux_test.go.
func unescapePath(out []byte, in []byte) (int, error) {
- // Not strictly necessary, but simplifies the implementation and will
- // always hold in users.
- if len(out) < len(in) {
- throw("output too small")
- }
-
var outi, ini int
for ini < len(in) {
+ if outi >= len(out) {
+ // given that caller already ensured out is long enough, this
+ // is only possible if there are malformed escape sequences
+ // we have not parsed yet.
+ return outi, errInvalidEscape
+ }
c := in[ini]
if c != '\\' {
out[outi] = c
checkBufferSize(scratch, ParseSize)
// The cgroup path is <cgroup mount point> + <relative path>.
- //
- // This is racy if our cgroup is changed while this runs. For example,
- // initially there is only a cgroup v2 mount and we are not in a
- // cgroup. After, there a cgroup v1 mount with a CPU controller and we
- // are placed in a cgroup in this hierarchy. In that case, findCPUMount
- // could pick the v2 mount, and findCPURelativePath could find the v2
- // relative path.
- //
- // In this case we'll later fail to read the cgroup files and fall back
- // to assuming no cgroup.
+ // relative path is the cgroup relative to the mount root.
- n, err := FindCPUMountPoint(out, scratch)
+ n, version, err := FindCPUCgroup(out, scratch)
if err != nil {
return 0, 0, err
}
- // The relative path always starts with /, so we can directly append it
- // to the mount point.
- n2, version, err := FindCPURelativePath(out[n:], scratch)
- if err != nil {
- return 0, 0, err
- }
- n += n2
-
- return n, version, nil
+ n, err = FindCPUMountPoint(out, out[:n], version, scratch)
+ return n, version, err
}
-// FindCPURelativePath finds the path to the CPU cgroup that this process is a member of
-// relative to the root of the cgroup mount and places it in out. scratch is a
-// scratch buffer for internal use.
+// FindCPUCgroup finds the path to the CPU cgroup that this process is a member of
+// and places it in out. scratch is a scratch buffer for internal use.
//
-// out must have length PathSize minus the size of the cgroup mount root (if
-// known). scratch must have length ParseSize.
+// out must have length PathSize. scratch must have length ParseSize.
//
// Returns the number of bytes written to out and the cgroup version (1 or 2).
//
// Returns ErrNoCgroup if the process is not in a CPU cgroup.
-func FindCPURelativePath(out []byte, scratch []byte) (int, Version, error) {
+func FindCPUCgroup(out []byte, scratch []byte) (int, Version, error) {
path := []byte("/proc/self/cgroup\x00")
fd, errno := linux.Open(&path[0], linux.O_RDONLY|linux.O_CLOEXEC, 0)
if errno == linux.ENOENT {
// The relative path always starts with /, so we can directly append it
// to the mount point.
- n, version, err := parseCPURelativePath(fd, linux.Read, out[:], scratch)
+ n, version, err := parseCPUCgroup(fd, linux.Read, out[:], scratch)
if err != nil {
linux.Close(fd)
return 0, 0, err
return n, version, nil
}
-// FindCPUMountPoint finds the root of the CPU cgroup mount places it in out.
+// FindCPUMountPoint finds the mount point containing the specified cgroup and
+// version with cpu controller, and compose the full path to the cgroup in out.
// scratch is a scratch buffer for internal use.
//
-// out must have length PathSize. scratch must have length ParseSize.
+// out must have length PathSize, may overlap with cgroup.
+// scratch must have length ParseSize.
//
// Returns the number of bytes written to out.
//
-// Returns ErrNoCgroup if the process is not in a CPU cgroup.
-func FindCPUMountPoint(out []byte, scratch []byte) (int, error) {
+// Returns ErrNoCgroup if no matching mount point is found.
+func FindCPUMountPoint(out, cgroup []byte, version Version, scratch []byte) (int, error) {
checkBufferSize(out, PathSize)
checkBufferSize(scratch, ParseSize)
return 0, errSyscallFailed
}
- n, err := parseCPUMount(fd, linux.Read, out, scratch)
+ n, err := parseCPUMount(fd, linux.Read, out, cgroup, version, scratch)
if err != nil {
linux.Close(fd)
return 0, err
"testing"
)
-const _PATH_MAX = 4096
-
func TestParseV1Number(t *testing.T) {
tests := []struct {
name string
}
}
-func TestParseCPURelativePath(t *testing.T) {
+func readString(contents string) func(fd int, b []byte) (int, uintptr) {
+ r := strings.NewReader(contents)
+ return func(fd int, b []byte) (int, uintptr) {
+ n, err := r.Read(b)
+ if err != nil && err != io.EOF {
+ const dummyErrno = 42
+ return n, dummyErrno
+ }
+ return n, 0
+ }
+}
+
+func TestParseCPUCgroup(t *testing.T) {
+ veryLongPathName := strings.Repeat("a", cgroup.PathSize+10)
+ evenLongerPathName := strings.Repeat("a", cgroup.ParseSize+10)
+
tests := []struct {
name string
contents string
contents: "",
wantErr: true,
},
+ {
+ name: "too-long",
+ contents: "0::/" + veryLongPathName + "\n",
+ wantErr: true,
+ },
+ {
+ name: "too-long-line",
+ contents: "0::/" + evenLongerPathName + "\n",
+ wantErr: true,
+ },
{
name: "v1",
contents: `2:cpu,cpuacct:/a/b/cpu
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
- r := strings.NewReader(tc.contents)
- read := func(fd int, b []byte) (int, uintptr) {
- n, err := r.Read(b)
- if err != nil && err != io.EOF {
- const dummyErrno = 42
- return n, dummyErrno
- }
- return n, 0
- }
-
var got [cgroup.PathSize]byte
var scratch [cgroup.ParseSize]byte
- n, gotVer, err := cgroup.ParseCPURelativePath(0, read, got[:], scratch[:])
+ n, gotVer, err := cgroup.ParseCPUCgroup(0, readString(tc.contents), got[:], scratch[:])
if (err != nil) != tc.wantErr {
t.Fatalf("parseCPURelativePath got err %v want %v", err, tc.wantErr)
}
}
}
+func TestParseCPUCgroupMalformed(t *testing.T) {
+ for _, contents := range []string{
+ "\n",
+ "0\n",
+ "0:\n",
+ "0::\n",
+ "0::a\n",
+ } {
+ t.Run("", func(t *testing.T) {
+ var got [cgroup.PathSize]byte
+ var scratch [cgroup.ParseSize]byte
+ n, v, err := cgroup.ParseCPUCgroup(0, readString(contents), got[:], scratch[:])
+ if err != cgroup.ErrMalformedFile {
+ t.Errorf("ParseCPUCgroup got %q (v%d), %v, want ErrMalformedFile", string(got[:n]), v, err)
+ }
+ })
+ }
+}
+
func TestContainsCPU(t *testing.T) {
tests := []struct {
in string
overlayLongLowerDir += fmt.Sprintf(":%s%d", lowerPath, i)
}
+ var longPath [4090]byte
+ for i := range longPath {
+ longPath[i] = byte(i)
+ }
+ escapedLongPath := escapePath(string(longPath[:]))
+ if len(escapedLongPath) <= cgroup.PathSize {
+ // ensure we actually support over PathSize long escaped path
+ t.Fatalf("escapedLongPath is too short to test")
+ }
+
tests := []struct {
name string
contents string
+ cgroup string
+ version cgroup.Version
want string
wantErr bool
}{
contents: "",
wantErr: true,
},
+ {
+ name: "invalid-root",
+ contents: "56 22 0:40 /\\1 /sys/fs/cgroup/cpu rw - cgroup cgroup rw,cpu,cpuacct\n",
+ cgroup: "/",
+ version: cgroup.V1,
+ wantErr: true,
+ },
+ {
+ name: "invalid-mount",
+ contents: "56 22 0:40 / /sys/fs/cgroup/\\1 rw - cgroup cgroup rw,cpu,cpuacct\n",
+ cgroup: "/",
+ version: cgroup.V1,
+ wantErr: true,
+ },
{
name: "v1",
contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
58 22 0:42 / /sys/fs/cgroup/net rw - cgroup cgroup rw,net
59 22 0:43 / /sys/fs/cgroup/cpuset rw - cgroup cgroup rw,cpuset
`,
- want: "/sys/fs/cgroup/cpu",
+ cgroup: "/",
+ version: cgroup.V1,
+ want: "/sys/fs/cgroup/cpu",
},
{
name: "v2",
21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
25 21 0:22 / /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
`,
- want: "/sys/fs/cgroup",
+ cgroup: "/",
+ version: cgroup.V2,
+ want: "/sys/fs/cgroup",
},
{
name: "mixed",
58 22 0:42 / /sys/fs/cgroup/net rw - cgroup cgroup rw,net
59 22 0:43 / /sys/fs/cgroup/cpuset rw - cgroup cgroup rw,cpuset
`,
- want: "/sys/fs/cgroup/cpu",
+ cgroup: "/",
+ version: cgroup.V1,
+ want: "/sys/fs/cgroup/cpu",
+ },
+ {
+ name: "mixed-choose-v2",
+ contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
+20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
+21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
+25 21 0:22 / /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
+49 22 0:37 / /sys/fs/cgroup/memory rw - cgroup cgroup rw,memory
+54 22 0:38 / /sys/fs/cgroup/io rw - cgroup cgroup rw,io
+56 22 0:40 / /sys/fs/cgroup/cpu rw - cgroup cgroup rw,cpu,cpuacct
+58 22 0:42 / /sys/fs/cgroup/net rw - cgroup cgroup rw,net
+59 22 0:43 / /sys/fs/cgroup/cpuset rw - cgroup cgroup rw,cpuset
+`,
+ cgroup: "/",
+ version: cgroup.V2,
+ want: "/sys/fs/cgroup",
},
{
name: "v2-escaped",
21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
25 21 0:22 / /sys/fs/cgroup/tab\011tab rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
`,
- want: `/sys/fs/cgroup/tab tab`,
+ cgroup: "/",
+ version: cgroup.V2,
+ want: `/sys/fs/cgroup/tab tab`,
},
{
// Overly long line on a different mount doesn't matter.
262 31 0:72 / /tmp/overlay2/0143e063b02f4801de9c847ad1c5ddc21fd2ead00653064d0c72ea967b248870/merged rw,relatime shared:729 - overlay overlay rw,lowerdir=` + overlayLongLowerDir + `,upperdir=/tmp/diff,workdir=/tmp/work
25 21 0:22 / /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
`,
- want: "/sys/fs/cgroup",
+ cgroup: "/",
+ version: cgroup.V2,
+ want: "/sys/fs/cgroup",
+ },
+ {
+ name: "long-escaped-path",
+ contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
+20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
+21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
+25 21 0:22 / /sys/` + escapedLongPath + ` rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
+`,
+ cgroup: "/",
+ version: cgroup.V2,
+ want: "/sys/" + string(longPath[:]),
+ },
+ {
+ name: "too-long-escaped-path",
+ contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
+20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
+21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
+25 21 0:22 / /sys/` + escapedLongPath + ` rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
+`,
+ cgroup: "/container", // compared to above, this makes the path too long
+ version: cgroup.V2,
+ wantErr: true,
+ },
+ {
+ name: "non-root_mount",
+ contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
+20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
+21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
+25 21 0:22 /sand /unrelated/cgroup1 rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
+25 21 0:22 /stone /unrelated/cgroup2 rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
+25 21 0:22 /sandbox/container/group /sys/fs/cgroup/mygroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
+25 21 0:22 /sandbox /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
+25 21 0:22 / /ignored/second/match rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
+`,
+ cgroup: "/sandbox/container",
+ version: cgroup.V2,
+ want: "/sys/fs/cgroup/container",
+ },
+ {
+ name: "v2-escaped-root",
+ contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
+20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
+21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
+25 21 0:22 /tab\011tab /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
+`,
+ cgroup: "/tab tab/container",
+ version: cgroup.V2,
+ want: `/sys/fs/cgroup/container`,
+ },
+ {
+ name: "non-root_cgroup",
+ contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
+20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
+21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
+25 21 0:22 / /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
+`,
+ cgroup: "/sandbox/container",
+ version: cgroup.V2,
+ want: "/sys/fs/cgroup/sandbox/container",
+ },
+ {
+ name: "mixed_non-root",
+ contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
+20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
+21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
+25 21 0:22 /sandbox /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
+49 22 0:37 /sandbox /sys/fs/cgroup/memory rw - cgroup cgroup rw,memory
+54 22 0:38 /sandbox /sys/fs/cgroup/io rw - cgroup cgroup rw,io
+56 22 0:40 /sand /unrelated/cgroup1 rw - cgroup cgroup rw,cpu,cpuacct
+56 22 0:40 /stone /unrelated/cgroup2 rw - cgroup cgroup rw,cpu,cpuacct
+56 22 0:40 /sandbox /sys/fs/cgroup/cpu rw - cgroup cgroup rw,cpu,cpuacct
+56 22 0:40 /sandbox/container/group /sys/fs/cgroup/cpu/mygroup rw - cgroup cgroup rw,cpu,cpuacct
+56 22 0:40 / /ignored/second/match rw - cgroup cgroup rw,cpu,cpuacct
+58 22 0:42 /sandbox /sys/fs/cgroup/net rw - cgroup cgroup rw,net
+59 22 0:43 /sandbox /sys/fs/cgroup/cpuset rw - cgroup cgroup rw,cpuset
+`,
+ cgroup: "/sandbox/container",
+ version: cgroup.V1,
+ want: "/sys/fs/cgroup/cpu/container",
+ },
+ {
+ // to see an example of this, for a PID in a cgroup namespace, run:
+ // nsenter -t <PID> -C -- cat /proc/self/cgroup
+ // nsenter -t <PID> -C -- grep cgroup /proc/self/mountinfo
+ // /mnt can be generated with `mount --bind /sys/fs/cgroup/kubepods.slice /mnt`,
+ // assuming PID is in cgroup /kubepods.slice
+ name: "out_of_namespace",
+ contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
+20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
+21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
+1243 61 0:26 /../../.. /mnt rw,nosuid,nodev,noexec,relatime shared:4 - cgroup2 cgroup2 rw
+29 22 0:26 /../../../.. /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime shared:4 - cgroup2 cgroup2 rw`,
+ cgroup: "/../../../../init.scope",
+ version: cgroup.V2,
+ want: "/sys/fs/cgroup/init.scope",
+ },
+ {
+ name: "out_of_namespace-root", // the process is directly in the root cgroup
+ contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
+20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
+21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
+1243 61 0:26 /../../.. /mnt rw,nosuid,nodev,noexec,relatime shared:4 - cgroup2 cgroup2 rw
+29 22 0:26 /../../../.. /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime shared:4 - cgroup2 cgroup2 rw`,
+ cgroup: "/../../../..",
+ version: cgroup.V2,
+ want: "/sys/fs/cgroup",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
- r := strings.NewReader(tc.contents)
- read := func(fd int, b []byte) (int, uintptr) {
- n, err := r.Read(b)
- if err != nil && err != io.EOF {
- const dummyErrno = 42
- return n, dummyErrno
- }
- return n, 0
- }
-
var got [cgroup.PathSize]byte
var scratch [cgroup.ParseSize]byte
- n, err := cgroup.ParseCPUMount(0, read, got[:], scratch[:])
+ n := copy(got[:], tc.cgroup)
+ n, err := cgroup.ParseCPUMount(0, readString(tc.contents), got[:],
+ got[:n], tc.version, scratch[:])
if (err != nil) != tc.wantErr {
t.Fatalf("parseCPUMount got err %v want %v", err, tc.wantErr)
}
}
}
+func TestParseCPUMountMalformed(t *testing.T) {
+ for _, contents := range []string{
+ "\n",
+ "22\n",
+ "22 1 8:1\n",
+ "22 1 8:1 /\n",
+ "22 1 8:1 / /cgroup\n",
+ "22 1 8:1 / /cgroup rw\n",
+ "22 1 8:1 / /cgroup rw -\n",
+ "22 1 8:1 / /cgroup rw - \n",
+ "22 1 8:1 / /cgroup rw - cgroup\n",
+ "22 1 8:1 / /cgroup rw - cgroup cgroup\n",
+ "22 1 8:1 a /cgroup rw - cgroup cgroup cpu\n",
+ } {
+ t.Run("", func(t *testing.T) {
+ var got [cgroup.PathSize]byte
+ var scratch [cgroup.ParseSize]byte
+ n, err := cgroup.ParseCPUMount(0, readString(contents), got[:], []byte("/"), cgroup.V1, scratch[:])
+ if err != cgroup.ErrMalformedFile {
+ t.Errorf("parseCPUMount got %q, %v, want ErrMalformedFile", string(got[:n]), err)
+ }
+ })
+ }
+}
+
// escapePath performs escaping equivalent to Linux's show_path.
//
// That is, '\', ' ', '\t', and '\n' are converted to octal escape sequences,
t.Run("unescapePath", func(t *testing.T) {
for _, tc := range tests {
- t.Run(tc.name, func(t *testing.T) {
- in := []byte(tc.escaped)
- out := make([]byte, len(in))
+ runTest := func(in, out []byte) {
n, err := cgroup.UnescapePath(out, in)
if err != nil {
t.Errorf("unescapePath got err %v want nil", err)
if got != tc.unescaped {
t.Errorf("unescapePath got %q want %q", got, tc.escaped)
}
+ }
+ t.Run(tc.name, func(t *testing.T) {
+ in := []byte(tc.escaped)
+ out := make([]byte, len(in))
+ runTest(in, out)
+ })
+ t.Run("inplace/"+tc.name, func(t *testing.T) {
+ in := []byte(tc.escaped)
+ runTest(in, in)
})
}
})
var (
ErrEOF = errEOF
ErrIncompleteLine = errIncompleteLine
+ ErrMalformedFile = errMalformedFile
)
var ContainsCPU = containsCPU
var ParseV1Number = parseV1Number
var ParseV2Limit = parseV2Limit
-var ParseCPURelativePath = parseCPURelativePath
+var ParseCPUCgroup = parseCPUCgroup
var ParseCPUMount = parseCPUMount
var UnescapePath = unescapePath