--- /dev/null
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cgroup
+
+import (
+ "internal/bytealg"
+ "internal/runtime/strconv"
+ "internal/runtime/syscall"
+)
+
+var (
+ ErrNoCgroup error = stringError("not in a cgroup")
+
+ errMalformedFile error = stringError("malformed file")
+)
+
+const _PATH_MAX = 4096
+
+const (
+ // Required amount of scratch space for CPULimit.
+ //
+ // TODO(prattmic): This is shockingly large (~70KiB) due to the (very
+ // unlikely) combination of extremely long paths consisting mostly
+ // escaped characters. The scratch buffer ends up in .bss in package
+ // runtime, so it doesn't contribute to binary size and generally won't
+ // be faulted in, but it would still be nice to shrink this. A more
+ // complex parser that did not need to keep entire lines in memory
+ // could get away with much less. Alternatively, we could do a one-off
+ // mmap allocation for this buffer, which is only mapped larger if we
+ // actually need the extra space.
+ ScratchSize = PathSize + ParseSize
+
+ // Required space to store a path of the cgroup in the filesystem.
+ PathSize = _PATH_MAX
+
+ // /proc/self/mountinfo path escape sequences are 4 characters long, so
+ // a path consisting entirely of escaped characters could be 4 times
+ // larger.
+ escapedPathMax = 4 * _PATH_MAX
+
+ // Required space to parse /proc/self/mountinfo and /proc/self/cgroup.
+ // See findCPUMount and findCPURelativePath.
+ ParseSize = 4 * escapedPathMax
+)
+
+// Include explicit NUL to be sure we include it in the slice.
+const (
+ v2MaxFile = "/cpu.max\x00"
+ v1QuotaFile = "/cpu.cfs_quota_us\x00"
+ v1PeriodFile = "/cpu.cfs_period_us\x00"
+)
+
+// Version indicates the cgroup version.
+type Version int
+
+const (
+ VersionUnknown Version = iota
+ V1
+ V2
+)
+
+// CPU owns the FDs required to read the CPU limit from a cgroup.
+type CPU struct {
+ version Version
+
+ // For cgroup v1, this is cpu.cfs_quota_us.
+ // For cgroup v2, this is cpu.max.
+ quotaFD int
+
+ // For cgroup v1, this is cpu.cfs_period_us.
+ // For cgroup v2, this is unused.
+ periodFD int
+}
+
+func (c CPU) Close() {
+ switch c.version {
+ case V1:
+ syscall.Close(c.quotaFD)
+ syscall.Close(c.periodFD)
+ case V2:
+ syscall.Close(c.quotaFD)
+ default:
+ throw("impossible cgroup version")
+ }
+}
+
+func checkBufferSize(s []byte, size int) {
+ if len(s) != size {
+ println("runtime: cgroup buffer length", len(s), "want", size)
+ throw("runtime: cgroup invalid buffer length")
+ }
+}
+
+// OpenCPU returns a CPU for the CPU cgroup containing the current process, or
+// ErrNoCgroup if the process is not in a CPU cgroup.
+//
+// scratch must have length ScratchSize.
+func OpenCPU(scratch []byte) (CPU, error) {
+ checkBufferSize(scratch, ScratchSize)
+
+ base := scratch[:PathSize]
+ scratch2 := scratch[PathSize:]
+
+ n, version, err := FindCPU(base, scratch2)
+ if err != nil {
+ return CPU{}, err
+ }
+
+ switch version {
+ case 1:
+ n2 := copy(base[n:], v1QuotaFile)
+ path := base[:n+n2]
+ quotaFD, errno := syscall.Open(&path[0], syscall.O_RDONLY|syscall.O_CLOEXEC, 0)
+ if errno != 0 {
+ // This may fail if this process was migrated out of
+ // the cgroup found by FindCPU and that cgroup has been
+ // deleted.
+ return CPU{}, errSyscallFailed
+ }
+
+ n2 = copy(base[n:], v1PeriodFile)
+ path = base[:n+n2]
+ periodFD, errno := syscall.Open(&path[0], syscall.O_RDONLY|syscall.O_CLOEXEC, 0)
+ if errno != 0 {
+ // This may fail if this process was migrated out of
+ // the cgroup found by FindCPU and that cgroup has been
+ // deleted.
+ return CPU{}, errSyscallFailed
+ }
+
+ c := CPU{
+ version: 1,
+ quotaFD: quotaFD,
+ periodFD: periodFD,
+ }
+ return c, nil
+ case 2:
+ n2 := copy(base[n:], v2MaxFile)
+ path := base[:n+n2]
+ maxFD, errno := syscall.Open(&path[0], syscall.O_RDONLY|syscall.O_CLOEXEC, 0)
+ if errno != 0 {
+ // This may fail if this process was migrated out of
+ // the cgroup found by FindCPU and that cgroup has been
+ // deleted.
+ return CPU{}, errSyscallFailed
+ }
+
+ c := CPU{
+ version: 2,
+ quotaFD: maxFD,
+ periodFD: -1,
+ }
+ return c, nil
+ default:
+ throw("impossible cgroup version")
+ panic("unreachable")
+ }
+}
+
+// Returns average CPU throughput limit from the cgroup, or ok false if there
+// is no limit.
+func ReadCPULimit(c CPU) (float64, bool, error) {
+ switch c.version {
+ case 1:
+ quota, err := readV1Number(c.quotaFD)
+ if err != nil {
+ return 0, false, errMalformedFile
+ }
+
+ if quota < 0 {
+ // No limit.
+ return 0, false, nil
+ }
+
+ period, err := readV1Number(c.periodFD)
+ if err != nil {
+ return 0, false, errMalformedFile
+ }
+
+ return float64(quota) / float64(period), true, nil
+ case 2:
+ // quotaFD is the cpu.max FD.
+ return readV2Limit(c.quotaFD)
+ default:
+ throw("impossible cgroup version")
+ panic("unreachable")
+ }
+}
+
+// Returns the value from the quota/period file.
+func readV1Number(fd int) (int64, error) {
+ // The format of the file is "<value>\n" where the value is in
+ // int64 microseconds and, if quota, may be -1 to indicate no limit.
+ //
+ // MaxInt64 requires 19 bytes to display in base 10, thus the
+ // conservative max size of this file is 19 + 1 (newline) = 20 bytes.
+ // We'll provide a bit more for good measure.
+ //
+ // Always read from the beginning of the file to get a fresh value.
+ var b [64]byte
+ n, errno := syscall.Pread(fd, b[:], 0)
+ if errno != 0 {
+ return 0, errSyscallFailed
+ }
+ if n == len(b) {
+ return 0, errMalformedFile
+ }
+
+ buf := b[:n]
+ return parseV1Number(buf)
+}
+
+func parseV1Number(buf []byte) (int64, error) {
+ // Ignore trailing newline.
+ i := bytealg.IndexByte(buf, '\n')
+ if i < 0 {
+ return 0, errMalformedFile
+ }
+ buf = buf[:i]
+
+ val, ok := strconv.Atoi64(string(buf))
+ if !ok {
+ return 0, errMalformedFile
+ }
+
+ return val, nil
+}
+
+// Returns CPU throughput limit, or ok false if there is no limit.
+func readV2Limit(fd int) (float64, bool, error) {
+ // The format of the file is "<quota> <period>\n" where quota and
+ // period are microseconds and quota may be "max" to indicate no limit.
+ //
+ // Note that the kernel is inconsistent about whether the values are
+ // uint64 or int64: values are parsed as uint64 but printed as int64.
+ // See kernel/sched/core.c:cpu_max_{show,write}.
+ //
+ // In practice, the kernel limits the period to 1s (1000000us) (see
+ // max_cfs_quota_period), and the quota to (1<<44)us (see
+ // max_cfs_runtime), so these values can't get large enough for the
+ // distinction to matter.
+ //
+ // MaxInt64 requires 19 bytes to display in base 10, thus the
+ // conservative max size of this file is 19 + 19 + 1 (space) + 1
+ // (newline) = 40 bytes. We'll provide a bit more for good measure.
+ //
+ // Always read from the beginning of the file to get a fresh value.
+ var b [64]byte
+ n, errno := syscall.Pread(fd, b[:], 0)
+ if errno != 0 {
+ return 0, false, errSyscallFailed
+ }
+ if n == len(b) {
+ return 0, false, errMalformedFile
+ }
+
+ buf := b[:n]
+ return parseV2Limit(buf)
+}
+
+func parseV2Limit(buf []byte) (float64, bool, error) {
+ i := bytealg.IndexByte(buf, ' ')
+ if i < 0 {
+ return 0, false, errMalformedFile
+ }
+
+ quotaStr := buf[:i]
+ if bytealg.Compare(quotaStr, []byte("max")) == 0 {
+ // No limit.
+ return 0, false, nil
+ }
+
+ periodStr := buf[i+1:]
+ // Ignore trailing newline, if any.
+ i = bytealg.IndexByte(periodStr, '\n')
+ if i < 0 {
+ return 0, false, errMalformedFile
+ }
+ periodStr = periodStr[:i]
+
+ quota, ok := strconv.Atoi64(string(quotaStr))
+ if !ok {
+ return 0, false, errMalformedFile
+ }
+
+ period, ok := strconv.Atoi64(string(periodStr))
+ if !ok {
+ return 0, false, errMalformedFile
+ }
+
+ return float64(quota) / float64(period), true, nil
+}
+
+// FindCPU finds the path to the CPU cgroup that this process is a member of
+// and places it in out. scratch is a scratch buffer for internal use.
+//
+// out must have length PathSize. scratch must have length ParseSize.
+//
+// Returns the number of bytes written to out and the cgroup version (1 or 2).
+//
+// Returns ErrNoCgroup if the process is not in a CPU cgroup.
+func FindCPU(out []byte, scratch []byte) (int, Version, error) {
+ checkBufferSize(out, PathSize)
+ checkBufferSize(scratch, ParseSize)
+
+ // The cgroup path is <cgroup mount point> + <relative path>.
+ //
+ // This is racy if our cgroup is changed while this runs. For example,
+ // initially there is only a cgroup v2 mount and we are not in a
+ // cgroup. After, there a cgroup v1 mount with a CPU controller and we
+ // are placed in a cgroup in this hierarchy. In that case, findCPUMount
+ // could pick the v2 mount, and findCPURelativePath could find the v2
+ // relative path.
+ //
+ // In this case we'll later fail to read the cgroup files and fall back
+ // to assuming no cgroup.
+
+ n, err := FindCPUMountPoint(out, scratch)
+ if err != nil {
+ return 0, 0, err
+ }
+
+ // The relative path always starts with /, so we can directly append it
+ // to the mount point.
+ n2, version, err := FindCPURelativePath(out[n:], scratch)
+ if err != nil {
+ return 0, 0, err
+ }
+ n += n2
+
+ return n, version, nil
+}
+
+// FindCPURelativePath finds the path to the CPU cgroup that this process is a member of
+// relative to the root of the cgroup mount and places it in out. scratch is a
+// scratch buffer for internal use.
+//
+// out must have length PathSize minus the size of the cgroup mount root (if
+// known). scratch must have length ParseSize.
+//
+// Returns the number of bytes written to out and the cgroup version (1 or 2).
+//
+// Returns ErrNoCgroup if the process is not in a CPU cgroup.
+func FindCPURelativePath(out []byte, scratch []byte) (int, Version, error) {
+ path := []byte("/proc/self/cgroup\x00")
+ fd, errno := syscall.Open(&path[0], syscall.O_RDONLY|syscall.O_CLOEXEC, 0)
+ if errno == syscall.ENOENT {
+ return 0, 0, ErrNoCgroup
+ } else if errno != 0 {
+ return 0, 0, errSyscallFailed
+ }
+
+ // The relative path always starts with /, so we can directly append it
+ // to the mount point.
+ n, version, err := parseCPURelativePath(fd, syscall.Read, out[:], scratch)
+ if err != nil {
+ syscall.Close(fd)
+ return 0, 0, err
+ }
+
+ syscall.Close(fd)
+ return n, version, nil
+}
+
+// Finds the path of the current process's CPU cgroup relative to the cgroup
+// mount and writes it to out.
+//
+// Returns the number of bytes written and the cgroup version (1 or 2).
+func parseCPURelativePath(fd int, read func(fd int, b []byte) (int, uintptr), out []byte, scratch []byte) (int, Version, error) {
+ // The format of each line is
+ //
+ // hierarchy-ID:controller-list:cgroup-path
+ //
+ // controller-list is comma-separated.
+ // See man 5 cgroup for more details.
+ //
+ // cgroup v2 has hierarchy-ID 0. If a v1 hierarchy contains "cpu", that
+ // is the CPU controller. Otherwise the v2 hierarchy (if any) is the
+ // CPU controller.
+ //
+ // hierarchy-ID and controller-list have relatively small maximum
+ // sizes, and the path can be up to _PATH_MAX, so we need a bit more
+ // than 1 _PATH_MAX of scratch space.
+
+ l := newLineReader(fd, scratch, read)
+
+ // Bytes written to out.
+ n := 0
+
+ for {
+ err := l.next()
+ if err == errIncompleteLine {
+ // Don't allow incomplete lines. While in theory the
+ // incomplete line may be for a controller we don't
+ // care about, in practice all lines should be of
+ // similar length, so we should just have a buffer big
+ // enough for any.
+ return 0, 0, err
+ } else if err == errEOF {
+ break
+ } else if err != nil {
+ return 0, 0, err
+ }
+
+ line := l.line()
+
+ // The format of each line is
+ //
+ // hierarchy-ID:controller-list:cgroup-path
+ //
+ // controller-list is comma-separated.
+ // See man 5 cgroup for more details.
+ i := bytealg.IndexByte(line, ':')
+ if i < 0 {
+ return 0, 0, errMalformedFile
+ }
+
+ hierarchy := line[:i]
+ line = line[i+1:]
+
+ i = bytealg.IndexByte(line, ':')
+ if i < 0 {
+ return 0, 0, errMalformedFile
+ }
+
+ controllers := line[:i]
+ line = line[i+1:]
+
+ path := line
+
+ if string(hierarchy) == "0" {
+ // v2 hierarchy.
+ n = copy(out, path)
+ // Keep searching, we might find a v1 hierarchy with a
+ // CPU controller, which takes precedence.
+ } else {
+ // v1 hierarchy
+ if containsCPU(controllers) {
+ // Found a v1 CPU controller. This must be the
+ // only one, so we're done.
+ return copy(out, path), V1, nil
+ }
+ }
+ }
+
+ if n == 0 {
+ // Found nothing.
+ return 0, 0, ErrNoCgroup
+ }
+
+ // Must be v2, v1 returns above.
+ return n, V2, nil
+}
+
+// Returns true if comma-separated list b contains "cpu".
+func containsCPU(b []byte) bool {
+ for len(b) > 0 {
+ i := bytealg.IndexByte(b, ',')
+ if i < 0 {
+ // Neither cmd/compile nor gccgo allocates for these string conversions.
+ return string(b) == "cpu"
+ }
+
+ curr := b[:i]
+ rest := b[i+1:]
+
+ if string(curr) == "cpu" {
+ return true
+ }
+
+ b = rest
+ }
+
+ return false
+}
+
+// FindCPUMountPoint finds the root of the CPU cgroup mount places it in out.
+// scratch is a scratch buffer for internal use.
+//
+// out must have length PathSize. scratch must have length ParseSize.
+//
+// Returns the number of bytes written to out.
+//
+// Returns ErrNoCgroup if the process is not in a CPU cgroup.
+func FindCPUMountPoint(out []byte, scratch []byte) (int, error) {
+ checkBufferSize(out, PathSize)
+ checkBufferSize(scratch, ParseSize)
+
+ path := []byte("/proc/self/mountinfo\x00")
+ fd, errno := syscall.Open(&path[0], syscall.O_RDONLY|syscall.O_CLOEXEC, 0)
+ if errno == syscall.ENOENT {
+ return 0, ErrNoCgroup
+ } else if errno != 0 {
+ return 0, errSyscallFailed
+ }
+
+ n, err := parseCPUMount(fd, syscall.Read, out, scratch)
+ if err != nil {
+ syscall.Close(fd)
+ return 0, err
+ }
+ syscall.Close(fd)
+
+ return n, nil
+}
+
+// Returns the mount point for the cpu cgroup controller (v1 or v2) from
+// /proc/self/mountinfo.
+func parseCPUMount(fd int, read func(fd int, b []byte) (int, uintptr), out []byte, scratch []byte) (int, error) {
+ // The format of each line is:
+ //
+ // 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
+ // (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
+ //
+ // (1) mount ID: unique identifier of the mount (may be reused after umount)
+ // (2) parent ID: ID of parent (or of self for the top of the mount tree)
+ // (3) major:minor: value of st_dev for files on filesystem
+ // (4) root: root of the mount within the filesystem
+ // (5) mount point: mount point relative to the process's root
+ // (6) mount options: per mount options
+ // (7) optional fields: zero or more fields of the form "tag[:value]"
+ // (8) separator: marks the end of the optional fields
+ // (9) filesystem type: name of filesystem of the form "type[.subtype]"
+ // (10) mount source: filesystem specific information or "none"
+ // (11) super options: per super block options
+ //
+ // See man 5 proc_pid_mountinfo for more details.
+ //
+ // Note that emitted paths will not contain space, tab, newline, or
+ // carriage return. Those are escaped. See Linux show_mountinfo ->
+ // show_path. We must unescape before returning.
+ //
+ // We return the mount point (5) if the filesystem type (9) is cgroup2,
+ // or cgroup with "cpu" in the super options (11).
+ //
+ // (4), (5), and (10) are up to _PATH_MAX. The remaining fields have a
+ // small fixed maximum size, so 4*_PATH_MAX is plenty of scratch space.
+ // Note that non-cgroup mounts may have arbitrarily long (11), but we
+ // can skip those when parsing.
+
+ l := newLineReader(fd, scratch, read)
+
+ // Bytes written to out.
+ n := 0
+
+ for {
+ //incomplete := false
+ err := l.next()
+ if err == errIncompleteLine {
+ // An incomplete line is fine as long as it doesn't
+ // impede parsing the fields we need. It shouldn't be
+ // possible for any mount to use more than 3*PATH_MAX
+ // before (9) because there are two paths and all other
+ // earlier fields have bounded options. Only (11) has
+ // unbounded options.
+ } else if err == errEOF {
+ break
+ } else if err != nil {
+ return 0, err
+ }
+
+ line := l.line()
+
+ // Skip first four fields.
+ for range 4 {
+ i := bytealg.IndexByte(line, ' ')
+ if i < 0 {
+ return 0, errMalformedFile
+ }
+ line = line[i+1:]
+ }
+
+ // (5) mount point: mount point relative to the process's root
+ i := bytealg.IndexByte(line, ' ')
+ if i < 0 {
+ return 0, errMalformedFile
+ }
+ mnt := line[:i]
+ line = line[i+1:]
+
+ // Skip ahead past optional fields, delimited by " - ".
+ for {
+ i = bytealg.IndexByte(line, ' ')
+ if i < 0 {
+ return 0, errMalformedFile
+ }
+ if i+3 >= len(line) {
+ return 0, errMalformedFile
+ }
+ delim := line[i : i+3]
+ if string(delim) == " - " {
+ line = line[i+3:]
+ break
+ }
+ line = line[i+1:]
+ }
+
+ // (9) filesystem type: name of filesystem of the form "type[.subtype]"
+ i = bytealg.IndexByte(line, ' ')
+ if i < 0 {
+ return 0, errMalformedFile
+ }
+ ftype := line[:i]
+ line = line[i+1:]
+
+ if string(ftype) != "cgroup" && string(ftype) != "cgroup2" {
+ continue
+ }
+
+ // As in findCPUPath, cgroup v1 with a CPU controller takes
+ // precendence over cgroup v2.
+ if string(ftype) == "cgroup2" {
+ // v2 hierarchy.
+ n, err = unescapePath(out, mnt)
+ if err != nil {
+ // Don't keep searching on error. The kernel
+ // should never produce broken escaping.
+ return n, err
+ }
+ // Keep searching, we might find a v1 hierarchy with a
+ // CPU controller, which takes precedence.
+ continue
+ }
+
+ // (10) mount source: filesystem specific information or "none"
+ i = bytealg.IndexByte(line, ' ')
+ if i < 0 {
+ return 0, errMalformedFile
+ }
+ // Don't care about mount source.
+ line = line[i+1:]
+
+ // (11) super options: per super block options
+ superOpt := line
+
+ // v1 hierarchy
+ if containsCPU(superOpt) {
+ // Found a v1 CPU controller. This must be the
+ // only one, so we're done.
+ return unescapePath(out, mnt)
+ }
+ }
+
+ if n == 0 {
+ // Found nothing.
+ return 0, ErrNoCgroup
+ }
+
+ return n, nil
+}
+
+var errInvalidEscape error = stringError("invalid path escape sequence")
+
+// unescapePath copies in to out, unescaping escape sequences generated by
+// Linux's show_path.
+//
+// That is, '\', ' ', '\t', and '\n' are converted to octal escape sequences,
+// like '\040' for space.
+//
+// out must be at least as large as in.
+//
+// Returns the number of bytes written to out.
+//
+// Also see escapePath in cgroup_linux_test.go.
+func unescapePath(out []byte, in []byte) (int, error) {
+ // Not strictly necessary, but simplifies the implementation and will
+ // always hold in users.
+ if len(out) < len(in) {
+ throw("output too small")
+ }
+
+ var outi, ini int
+ for ini < len(in) {
+ c := in[ini]
+ if c != '\\' {
+ out[outi] = c
+ outi++
+ ini++
+ continue
+ }
+
+ // Start of escape sequence.
+
+ // Escape sequence is always 4 characters: one slash and three
+ // digits.
+ if ini+3 >= len(in) {
+ return outi, errInvalidEscape
+ }
+
+ var outc byte
+ for i := range 3 {
+ c := in[ini+1+i]
+ if c < '0' || c > '9' {
+ return outi, errInvalidEscape
+ }
+
+ outc *= 8
+ outc += c - '0'
+ }
+
+ out[outi] = outc
+ outi++
+
+ ini += 4
+ }
+
+ return outi, nil
+}
--- /dev/null
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cgroup_test
+
+import (
+ "fmt"
+ "internal/runtime/cgroup"
+ "io"
+ "strconv"
+ "strings"
+ "testing"
+)
+
+const _PATH_MAX = 4096
+
+func TestParseV1Number(t *testing.T) {
+ tests := []struct {
+ name string
+ contents string
+ want int64
+ wantErr bool
+ }{
+ {
+ name: "disabled",
+ contents: "-1\n",
+ want: -1,
+ },
+ {
+ name: "500000",
+ contents: "500000\n",
+ want: 500000,
+ },
+ {
+ name: "MaxInt64",
+ contents: "9223372036854775807\n",
+ want: 9223372036854775807,
+ },
+ {
+ name: "missing-newline",
+ contents: "500000",
+ wantErr: true,
+ },
+ {
+ name: "not-a-number",
+ contents: "123max\n",
+ wantErr: true,
+ },
+ {
+ name: "v2",
+ contents: "1000 5000\n",
+ wantErr: true,
+ },
+ }
+
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ got, err := cgroup.ParseV1Number([]byte(tc.contents))
+ if tc.wantErr {
+ if err == nil {
+ t.Fatalf("parseV1Number got err nil want non-nil")
+ }
+ return
+ }
+ if err != nil {
+ t.Fatalf("parseV1Number got err %v want nil", err)
+ }
+
+ if got != tc.want {
+ t.Errorf("parseV1Number got %d want %d", got, tc.want)
+ }
+ })
+ }
+}
+
+func TestParseV2Limit(t *testing.T) {
+ tests := []struct {
+ name string
+ contents string
+ want float64
+ wantOK bool
+ wantErr bool
+ }{
+ {
+ name: "disabled",
+ contents: "max 100000\n",
+ wantOK: false,
+ },
+ {
+ name: "5",
+ contents: "500000 100000\n",
+ want: 5,
+ wantOK: true,
+ },
+ {
+ name: "0.5",
+ contents: "50000 100000\n",
+ want: 0.5,
+ wantOK: true,
+ },
+ {
+ name: "2.5",
+ contents: "250000 100000\n",
+ want: 2.5,
+ wantOK: true,
+ },
+ {
+ name: "MaxInt64",
+ contents: "9223372036854775807 9223372036854775807\n",
+ want: 1,
+ wantOK: true,
+ },
+ {
+ name: "missing-newline",
+ contents: "500000 100000",
+ wantErr: true,
+ },
+ {
+ name: "v1",
+ contents: "500000\n",
+ wantErr: true,
+ },
+ {
+ name: "quota-not-a-number",
+ contents: "500000us 100000\n",
+ wantErr: true,
+ },
+ {
+ name: "period-not-a-number",
+ contents: "500000 100000us\n",
+ wantErr: true,
+ },
+ }
+
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ got, gotOK, err := cgroup.ParseV2Limit([]byte(tc.contents))
+ if tc.wantErr {
+ if err == nil {
+ t.Fatalf("parseV1Limit got err nil want non-nil")
+ }
+ return
+ }
+ if err != nil {
+ t.Fatalf("parseV2Limit got err %v want nil", err)
+ }
+
+ if gotOK != tc.wantOK {
+ t.Errorf("parseV2Limit got ok %v want %v", gotOK, tc.wantOK)
+ }
+
+ if tc.wantOK && got != tc.want {
+ t.Errorf("parseV2Limit got %f want %f", got, tc.want)
+ }
+ })
+ }
+}
+
+func TestParseCPURelativePath(t *testing.T) {
+ tests := []struct {
+ name string
+ contents string
+ want string
+ wantVer cgroup.Version
+ wantErr bool
+ }{
+ {
+ name: "empty",
+ contents: "",
+ wantErr: true,
+ },
+ {
+ name: "v1",
+ contents: `2:cpu,cpuacct:/a/b/cpu
+1:blkio:/a/b/blkio
+`,
+ want: "/a/b/cpu",
+ wantVer: cgroup.V1,
+ },
+ {
+ name: "v2",
+ contents: "0::/a/b/c\n",
+ want: "/a/b/c",
+ wantVer: cgroup.V2,
+ },
+ {
+ name: "mixed",
+ contents: `2:cpu,cpuacct:/a/b/cpu
+1:blkio:/a/b/blkio
+0::/a/b/v2
+`,
+ want: "/a/b/cpu",
+ wantVer: cgroup.V1,
+ },
+ }
+
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ r := strings.NewReader(tc.contents)
+ read := func(fd int, b []byte) (int, uintptr) {
+ n, err := r.Read(b)
+ if err != nil && err != io.EOF {
+ const dummyErrno = 42
+ return n, dummyErrno
+ }
+ return n, 0
+ }
+
+ var got [cgroup.PathSize]byte
+ var scratch [cgroup.ParseSize]byte
+ n, gotVer, err := cgroup.ParseCPURelativePath(0, read, got[:], scratch[:])
+ if (err != nil) != tc.wantErr {
+ t.Fatalf("parseCPURelativePath got err %v want %v", err, tc.wantErr)
+ }
+
+ if gotVer != tc.wantVer {
+ t.Errorf("parseCPURelativePath got cgroup version %d want %d", gotVer, tc.wantVer)
+ }
+
+ if string(got[:n]) != tc.want {
+ t.Errorf("parseCPURelativePath got %q want %q", string(got[:n]), tc.want)
+ }
+ })
+ }
+}
+
+func TestContainsCPU(t *testing.T) {
+ tests := []struct {
+ in string
+ want bool
+ }{
+ {
+ in: "",
+ want: false,
+ },
+ {
+ in: ",",
+ want: false,
+ },
+ {
+ in: "cpu",
+ want: true,
+ },
+ {
+ in: "memory,cpu",
+ want: true,
+ },
+ {
+ in: "cpu,memory",
+ want: true,
+ },
+ {
+ in: "memory,cpu,block",
+ want: true,
+ },
+ {
+ in: "memory,cpuacct,block",
+ want: false,
+ },
+ }
+
+ for _, tc := range tests {
+ t.Run(tc.in, func(t *testing.T) {
+ got := cgroup.ContainsCPU([]byte(tc.in))
+ if got != tc.want {
+ t.Errorf("containsCPU(%q) got %v want %v", tc.in, got, tc.want)
+ }
+ })
+ }
+}
+
+func TestParseCPUMount(t *testing.T) {
+ // Used for v2-longline. We want an overlayfs mount to have an option
+ // so long that the entire line can't possibly fit in the scratch
+ // buffer.
+ const lowerPath = "/so/many/overlay/layers"
+ overlayLongLowerDir := lowerPath
+ for i := 0; len(overlayLongLowerDir) < cgroup.ScratchSize; i++ {
+ overlayLongLowerDir += fmt.Sprintf(":%s%d", lowerPath, i)
+ }
+
+ tests := []struct {
+ name string
+ contents string
+ want string
+ wantErr bool
+ }{
+ {
+ name: "empty",
+ contents: "",
+ wantErr: true,
+ },
+ {
+ name: "v1",
+ contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
+20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
+21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
+49 22 0:37 / /sys/fs/cgroup/memory rw - cgroup cgroup rw,memory
+54 22 0:38 / /sys/fs/cgroup/io rw - cgroup cgroup rw,io
+56 22 0:40 / /sys/fs/cgroup/cpu rw - cgroup cgroup rw,cpu,cpuacct
+58 22 0:42 / /sys/fs/cgroup/net rw - cgroup cgroup rw,net
+59 22 0:43 / /sys/fs/cgroup/cpuset rw - cgroup cgroup rw,cpuset
+`,
+ want: "/sys/fs/cgroup/cpu",
+ },
+ {
+ name: "v2",
+ contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
+20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
+21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
+25 21 0:22 / /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
+`,
+ want: "/sys/fs/cgroup",
+ },
+ {
+ name: "mixed",
+ contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
+20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
+21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
+25 21 0:22 / /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
+49 22 0:37 / /sys/fs/cgroup/memory rw - cgroup cgroup rw,memory
+54 22 0:38 / /sys/fs/cgroup/io rw - cgroup cgroup rw,io
+56 22 0:40 / /sys/fs/cgroup/cpu rw - cgroup cgroup rw,cpu,cpuacct
+58 22 0:42 / /sys/fs/cgroup/net rw - cgroup cgroup rw,net
+59 22 0:43 / /sys/fs/cgroup/cpuset rw - cgroup cgroup rw,cpuset
+`,
+ want: "/sys/fs/cgroup/cpu",
+ },
+ {
+ name: "v2-escaped",
+ contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
+20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
+21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
+25 21 0:22 / /sys/fs/cgroup/tab\011tab rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
+`,
+ want: `/sys/fs/cgroup/tab tab`,
+ },
+ {
+ // Overly long line on a different mount doesn't matter.
+ name: "v2-longline",
+ contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
+20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
+21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
+262 31 0:72 / /tmp/overlay2/0143e063b02f4801de9c847ad1c5ddc21fd2ead00653064d0c72ea967b248870/merged rw,relatime shared:729 - overlay overlay rw,lowerdir=` + overlayLongLowerDir + `,upperdir=/tmp/diff,workdir=/tmp/work
+25 21 0:22 / /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
+`,
+ want: "/sys/fs/cgroup",
+ },
+ }
+
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ r := strings.NewReader(tc.contents)
+ read := func(fd int, b []byte) (int, uintptr) {
+ n, err := r.Read(b)
+ if err != nil && err != io.EOF {
+ const dummyErrno = 42
+ return n, dummyErrno
+ }
+ return n, 0
+ }
+
+ var got [cgroup.PathSize]byte
+ var scratch [cgroup.ParseSize]byte
+ n, err := cgroup.ParseCPUMount(0, read, got[:], scratch[:])
+ if (err != nil) != tc.wantErr {
+ t.Fatalf("parseCPUMount got err %v want %v", err, tc.wantErr)
+ }
+
+ if string(got[:n]) != tc.want {
+ t.Errorf("parseCPUMount got %q want %q", string(got[:n]), tc.want)
+ }
+ })
+ }
+}
+
+// escapePath performs escaping equivalent to Linux's show_path.
+//
+// That is, '\', ' ', '\t', and '\n' are converted to octal escape sequences,
+// like '\040' for space.
+func escapePath(s string) string {
+ out := make([]rune, 0, len(s))
+ for _, c := range s {
+ switch c {
+ case '\\', ' ', '\t', '\n':
+ out = append(out, '\\')
+ cs := strconv.FormatInt(int64(c), 8)
+ if len(cs) <= 2 {
+ out = append(out, '0')
+ }
+ if len(cs) <= 1 {
+ out = append(out, '0')
+ }
+ for _, csc := range cs {
+ out = append(out, csc)
+ }
+ default:
+ out = append(out, c)
+ }
+ }
+ return string(out)
+}
+
+func TestEscapePath(t *testing.T) {
+ tests := []struct {
+ name string
+ unescaped string
+ escaped string
+ }{
+ {
+ name: "boring",
+ unescaped: `/a/b/c`,
+ escaped: `/a/b/c`,
+ },
+ {
+ name: "space",
+ unescaped: `/a/b b/c`,
+ escaped: `/a/b\040b/c`,
+ },
+ {
+ name: "tab",
+ unescaped: `/a/b b/c`,
+ escaped: `/a/b\011b/c`,
+ },
+ {
+ name: "newline",
+ unescaped: `/a/b
+b/c`,
+ escaped: `/a/b\012b/c`,
+ },
+ {
+ name: "slash",
+ unescaped: `/a/b\b/c`,
+ escaped: `/a/b\134b/c`,
+ },
+ {
+ name: "beginning",
+ unescaped: `\b/c`,
+ escaped: `\134b/c`,
+ },
+ {
+ name: "ending",
+ unescaped: `/a/\`,
+ escaped: `/a/\134`,
+ },
+ }
+
+ t.Run("escapePath", func(t *testing.T) {
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ got := escapePath(tc.unescaped)
+ if got != tc.escaped {
+ t.Errorf("escapePath got %q want %q", got, tc.escaped)
+ }
+ })
+ }
+ })
+
+ t.Run("unescapePath", func(t *testing.T) {
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ in := []byte(tc.escaped)
+ out := make([]byte, len(in))
+ n, err := cgroup.UnescapePath(out, in)
+ if err != nil {
+ t.Errorf("unescapePath got err %v want nil", err)
+ }
+ got := string(out[:n])
+ if got != tc.unescaped {
+ t.Errorf("unescapePath got %q want %q", got, tc.escaped)
+ }
+ })
+ }
+ })
+}