From f12c66fbed546645389cf184b0e2ffd6ad9f78ec Mon Sep 17 00:00:00 2001
From: Michael Pratt <mpratt@google.com>
Date: Tue, 22 Apr 2025 10:24:37 +0000
Subject: [PATCH] internal/runtime/cgroup: CPU cgroup limit discovery

For #73193.

Change-Id: I6a6a636ca9fa9cba429cf053468c56c2939cb1ac
Reviewed-on: https://go-review.googlesource.com/c/go/+/668638
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
---
 src/internal/runtime/cgroup/cgroup_linux.go   | 710 ++++++++++++++++++
 .../runtime/cgroup/cgroup_linux_test.go       | 476 ++++++++++++
 .../runtime/cgroup/export_linux_test.go       |  15 +
 src/internal/runtime/cgroup/runtime.go        |  14 +
 src/runtime/panic.go                          |   5 +
 5 files changed, 1220 insertions(+)
 create mode 100644 src/internal/runtime/cgroup/cgroup_linux.go
 create mode 100644 src/internal/runtime/cgroup/cgroup_linux_test.go
 create mode 100644 src/internal/runtime/cgroup/export_linux_test.go
 create mode 100644 src/internal/runtime/cgroup/runtime.go

diff --git a/src/internal/runtime/cgroup/cgroup_linux.go b/src/internal/runtime/cgroup/cgroup_linux.go
new file mode 100644
index 0000000000..2fc3b225c5
--- /dev/null
+++ b/src/internal/runtime/cgroup/cgroup_linux.go
@@ -0,0 +1,710 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cgroup
+
+import (
+	"internal/bytealg"
+	"internal/runtime/strconv"
+	"internal/runtime/syscall"
+)
+
+var (
+	ErrNoCgroup error = stringError("not in a cgroup")
+
+	errMalformedFile error = stringError("malformed file")
+)
+
+const _PATH_MAX = 4096
+
+const (
+	// Required amount of scratch space for CPULimit.
+	//
+	// TODO(prattmic): This is shockingly large (~70KiB) due to the (very
+	// unlikely) combination of extremely long paths consisting mostly
+	// escaped characters. The scratch buffer ends up in .bss in package
+	// runtime, so it doesn't contribute to binary size and generally won't
+	// be faulted in, but it would still be nice to shrink this. A more
+	// complex parser that did not need to keep entire lines in memory
+	// could get away with much less. Alternatively, we could do a one-off
+	// mmap allocation for this buffer, which is only mapped larger if we
+	// actually need the extra space.
+	ScratchSize = PathSize + ParseSize
+
+	// Required space to store a path of the cgroup in the filesystem.
+	PathSize = _PATH_MAX
+
+	// /proc/self/mountinfo path escape sequences are 4 characters long, so
+	// a path consisting entirely of escaped characters could be 4 times
+	// larger.
+	escapedPathMax = 4 * _PATH_MAX
+
+	// Required space to parse /proc/self/mountinfo and /proc/self/cgroup.
+	// See findCPUMount and findCPURelativePath.
+	ParseSize = 4 * escapedPathMax
+)
+
+// Include explicit NUL to be sure we include it in the slice.
+const (
+	v2MaxFile    = "/cpu.max\x00"
+	v1QuotaFile  = "/cpu.cfs_quota_us\x00"
+	v1PeriodFile = "/cpu.cfs_period_us\x00"
+)
+
+// Version indicates the cgroup version.
+type Version int
+
+const (
+	VersionUnknown Version = iota
+	V1
+	V2
+)
+
+// CPU owns the FDs required to read the CPU limit from a cgroup.
+type CPU struct {
+	version Version
+
+	// For cgroup v1, this is cpu.cfs_quota_us.
+	// For cgroup v2, this is cpu.max.
+	quotaFD int
+
+	// For cgroup v1, this is cpu.cfs_period_us.
+	// For cgroup v2, this is unused.
+	periodFD int
+}
+
+func (c CPU) Close() {
+	switch c.version {
+	case V1:
+		syscall.Close(c.quotaFD)
+		syscall.Close(c.periodFD)
+	case V2:
+		syscall.Close(c.quotaFD)
+	default:
+		throw("impossible cgroup version")
+	}
+}
+
+func checkBufferSize(s []byte, size int) {
+	if len(s) != size {
+		println("runtime: cgroup buffer length", len(s), "want", size)
+		throw("runtime: cgroup invalid buffer length")
+	}
+}
+
+// OpenCPU returns a CPU for the CPU cgroup containing the current process, or
+// ErrNoCgroup if the process is not in a CPU cgroup.
+//
+// scratch must have length ScratchSize.
+func OpenCPU(scratch []byte) (CPU, error) {
+	checkBufferSize(scratch, ScratchSize)
+
+	base := scratch[:PathSize]
+	scratch2 := scratch[PathSize:]
+
+	n, version, err := FindCPU(base, scratch2)
+	if err != nil {
+		return CPU{}, err
+	}
+
+	switch version {
+	case 1:
+		n2 := copy(base[n:], v1QuotaFile)
+		path := base[:n+n2]
+		quotaFD, errno := syscall.Open(&path[0], syscall.O_RDONLY|syscall.O_CLOEXEC, 0)
+		if errno != 0 {
+			// This may fail if this process was migrated out of
+			// the cgroup found by FindCPU and that cgroup has been
+			// deleted.
+			return CPU{}, errSyscallFailed
+		}
+
+		n2 = copy(base[n:], v1PeriodFile)
+		path = base[:n+n2]
+		periodFD, errno := syscall.Open(&path[0], syscall.O_RDONLY|syscall.O_CLOEXEC, 0)
+		if errno != 0 {
+			// This may fail if this process was migrated out of
+			// the cgroup found by FindCPU and that cgroup has been
+			// deleted.
+			return CPU{}, errSyscallFailed
+		}
+
+		c := CPU{
+			version:  1,
+			quotaFD:  quotaFD,
+			periodFD: periodFD,
+		}
+		return c, nil
+	case 2:
+		n2 := copy(base[n:], v2MaxFile)
+		path := base[:n+n2]
+		maxFD, errno := syscall.Open(&path[0], syscall.O_RDONLY|syscall.O_CLOEXEC, 0)
+		if errno != 0 {
+			// This may fail if this process was migrated out of
+			// the cgroup found by FindCPU and that cgroup has been
+			// deleted.
+			return CPU{}, errSyscallFailed
+		}
+
+		c := CPU{
+			version:  2,
+			quotaFD:  maxFD,
+			periodFD: -1,
+		}
+		return c, nil
+	default:
+		throw("impossible cgroup version")
+		panic("unreachable")
+	}
+}
+
+// Returns average CPU throughput limit from the cgroup, or ok false if there
+// is no limit.
+func ReadCPULimit(c CPU) (float64, bool, error) {
+	switch c.version {
+	case 1:
+		quota, err := readV1Number(c.quotaFD)
+		if err != nil {
+			return 0, false, errMalformedFile
+		}
+
+		if quota < 0 {
+			// No limit.
+			return 0, false, nil
+		}
+
+		period, err := readV1Number(c.periodFD)
+		if err != nil {
+			return 0, false, errMalformedFile
+		}
+
+		return float64(quota) / float64(period), true, nil
+	case 2:
+		// quotaFD is the cpu.max FD.
+		return readV2Limit(c.quotaFD)
+	default:
+		throw("impossible cgroup version")
+		panic("unreachable")
+	}
+}
+
+// Returns the value from the quota/period file.
+func readV1Number(fd int) (int64, error) {
+	// The format of the file is "<value>\n" where the value is in
+	// int64 microseconds and, if quota, may be -1 to indicate no limit.
+	//
+	// MaxInt64 requires 19 bytes to display in base 10, thus the
+	// conservative max size of this file is 19 + 1 (newline) = 20 bytes.
+	// We'll provide a bit more for good measure.
+	//
+	// Always read from the beginning of the file to get a fresh value.
+	var b [64]byte
+	n, errno := syscall.Pread(fd, b[:], 0)
+	if errno != 0 {
+		return 0, errSyscallFailed
+	}
+	if n == len(b) {
+		return 0, errMalformedFile
+	}
+
+	buf := b[:n]
+	return parseV1Number(buf)
+}
+
+func parseV1Number(buf []byte) (int64, error) {
+	// Ignore trailing newline.
+	i := bytealg.IndexByte(buf, '\n')
+	if i < 0 {
+		return 0, errMalformedFile
+	}
+	buf = buf[:i]
+
+	val, ok := strconv.Atoi64(string(buf))
+	if !ok {
+		return 0, errMalformedFile
+	}
+
+	return val, nil
+}
+
+// Returns CPU throughput limit, or ok false if there is no limit.
+func readV2Limit(fd int) (float64, bool, error) {
+	// The format of the file is "<quota> <period>\n" where quota and
+	// period are microseconds and quota may be "max" to indicate no limit.
+	//
+	// Note that the kernel is inconsistent about whether the values are
+	// uint64 or int64: values are parsed as uint64 but printed as int64.
+	// See kernel/sched/core.c:cpu_max_{show,write}.
+	//
+	// In practice, the kernel limits the period to 1s (1000000us) (see
+	// max_cfs_quota_period), and the quota to (1<<44)us (see
+	// max_cfs_runtime), so these values can't get large enough for the
+	// distinction to matter.
+	//
+	// MaxInt64 requires 19 bytes to display in base 10, thus the
+	// conservative max size of this file is 19 + 19 + 1 (space) + 1
+	// (newline) = 40 bytes. We'll provide a bit more for good measure.
+	//
+	// Always read from the beginning of the file to get a fresh value.
+	var b [64]byte
+	n, errno := syscall.Pread(fd, b[:], 0)
+	if errno != 0 {
+		return 0, false, errSyscallFailed
+	}
+	if n == len(b) {
+		return 0, false, errMalformedFile
+	}
+
+	buf := b[:n]
+	return parseV2Limit(buf)
+}
+
+func parseV2Limit(buf []byte) (float64, bool, error) {
+	i := bytealg.IndexByte(buf, ' ')
+	if i < 0 {
+		return 0, false, errMalformedFile
+	}
+
+	quotaStr := buf[:i]
+	if bytealg.Compare(quotaStr, []byte("max")) == 0 {
+		// No limit.
+		return 0, false, nil
+	}
+
+	periodStr := buf[i+1:]
+	// Ignore trailing newline, if any.
+	i = bytealg.IndexByte(periodStr, '\n')
+	if i < 0 {
+		return 0, false, errMalformedFile
+	}
+	periodStr = periodStr[:i]
+
+	quota, ok := strconv.Atoi64(string(quotaStr))
+	if !ok {
+		return 0, false, errMalformedFile
+	}
+
+	period, ok := strconv.Atoi64(string(periodStr))
+	if !ok {
+		return 0, false, errMalformedFile
+	}
+
+	return float64(quota) / float64(period), true, nil
+}
+
+// FindCPU finds the path to the CPU cgroup that this process is a member of
+// and places it in out. scratch is a scratch buffer for internal use.
+//
+// out must have length PathSize. scratch must have length ParseSize.
+//
+// Returns the number of bytes written to out and the cgroup version (1 or 2).
+//
+// Returns ErrNoCgroup if the process is not in a CPU cgroup.
+func FindCPU(out []byte, scratch []byte) (int, Version, error) {
+	checkBufferSize(out, PathSize)
+	checkBufferSize(scratch, ParseSize)
+
+	// The cgroup path is <cgroup mount point> + <relative path>.
+	//
+	// This is racy if our cgroup is changed while this runs. For example,
+	// initially there is only a cgroup v2 mount and we are not in a
+	// cgroup. After, there a cgroup v1 mount with a CPU controller and we
+	// are placed in a cgroup in this hierarchy. In that case, findCPUMount
+	// could pick the v2 mount, and findCPURelativePath could find the v2
+	// relative path.
+	//
+	// In this case we'll later fail to read the cgroup files and fall back
+	// to assuming no cgroup.
+
+	n, err := FindCPUMountPoint(out, scratch)
+	if err != nil {
+		return 0, 0, err
+	}
+
+	// The relative path always starts with /, so we can directly append it
+	// to the mount point.
+	n2, version, err := FindCPURelativePath(out[n:], scratch)
+	if err != nil {
+		return 0, 0, err
+	}
+	n += n2
+
+	return n, version, nil
+}
+
+// FindCPURelativePath finds the path to the CPU cgroup that this process is a member of
+// relative to the root of the cgroup mount and places it in out. scratch is a
+// scratch buffer for internal use.
+//
+// out must have length PathSize minus the size of the cgroup mount root (if
+// known). scratch must have length ParseSize.
+//
+// Returns the number of bytes written to out and the cgroup version (1 or 2).
+//
+// Returns ErrNoCgroup if the process is not in a CPU cgroup.
+func FindCPURelativePath(out []byte, scratch []byte) (int, Version, error) {
+	path := []byte("/proc/self/cgroup\x00")
+	fd, errno := syscall.Open(&path[0], syscall.O_RDONLY|syscall.O_CLOEXEC, 0)
+	if errno == syscall.ENOENT {
+		return 0, 0, ErrNoCgroup
+	} else if errno != 0 {
+		return 0, 0, errSyscallFailed
+	}
+
+	// The relative path always starts with /, so we can directly append it
+	// to the mount point.
+	n, version, err := parseCPURelativePath(fd, syscall.Read, out[:], scratch)
+	if err != nil {
+		syscall.Close(fd)
+		return 0, 0, err
+	}
+
+	syscall.Close(fd)
+	return n, version, nil
+}
+
+// Finds the path of the current process's CPU cgroup relative to the cgroup
+// mount and writes it to out.
+//
+// Returns the number of bytes written and the cgroup version (1 or 2).
+func parseCPURelativePath(fd int, read func(fd int, b []byte) (int, uintptr), out []byte, scratch []byte) (int, Version, error) {
+	// The format of each line is
+	//
+	//   hierarchy-ID:controller-list:cgroup-path
+	//
+	// controller-list is comma-separated.
+	// See man 5 cgroup for more details.
+	//
+	// cgroup v2 has hierarchy-ID 0. If a v1 hierarchy contains "cpu", that
+	// is the CPU controller. Otherwise the v2 hierarchy (if any) is the
+	// CPU controller.
+	//
+	// hierarchy-ID and controller-list have relatively small maximum
+	// sizes, and the path can be up to _PATH_MAX, so we need a bit more
+	// than 1 _PATH_MAX of scratch space.
+
+	l := newLineReader(fd, scratch, read)
+
+	// Bytes written to out.
+	n := 0
+
+	for {
+		err := l.next()
+		if err == errIncompleteLine {
+			// Don't allow incomplete lines. While in theory the
+			// incomplete line may be for a controller we don't
+			// care about, in practice all lines should be of
+			// similar length, so we should just have a buffer big
+			// enough for any.
+			return 0, 0, err
+		} else if err == errEOF {
+			break
+		} else if err != nil {
+			return 0, 0, err
+		}
+
+		line := l.line()
+
+		// The format of each line is
+		//
+		//   hierarchy-ID:controller-list:cgroup-path
+		//
+		// controller-list is comma-separated.
+		// See man 5 cgroup for more details.
+		i := bytealg.IndexByte(line, ':')
+		if i < 0 {
+			return 0, 0, errMalformedFile
+		}
+
+		hierarchy := line[:i]
+		line = line[i+1:]
+
+		i = bytealg.IndexByte(line, ':')
+		if i < 0 {
+			return 0, 0, errMalformedFile
+		}
+
+		controllers := line[:i]
+		line = line[i+1:]
+
+		path := line
+
+		if string(hierarchy) == "0" {
+			// v2 hierarchy.
+			n = copy(out, path)
+			// Keep searching, we might find a v1 hierarchy with a
+			// CPU controller, which takes precedence.
+		} else {
+			// v1 hierarchy
+			if containsCPU(controllers) {
+				// Found a v1 CPU controller. This must be the
+				// only one, so we're done.
+				return copy(out, path), V1, nil
+			}
+		}
+	}
+
+	if n == 0 {
+		// Found nothing.
+		return 0, 0, ErrNoCgroup
+	}
+
+	// Must be v2, v1 returns above.
+	return n, V2, nil
+}
+
+// Returns true if comma-separated list b contains "cpu".
+func containsCPU(b []byte) bool {
+	for len(b) > 0 {
+		i := bytealg.IndexByte(b, ',')
+		if i < 0 {
+			// Neither cmd/compile nor gccgo allocates for these string conversions.
+			return string(b) == "cpu"
+		}
+
+		curr := b[:i]
+		rest := b[i+1:]
+
+		if string(curr) == "cpu" {
+			return true
+		}
+
+		b = rest
+	}
+
+	return false
+}
+
+// FindCPUMountPoint finds the root of the CPU cgroup mount places it in out.
+// scratch is a scratch buffer for internal use.
+//
+// out must have length PathSize. scratch must have length ParseSize.
+//
+// Returns the number of bytes written to out.
+//
+// Returns ErrNoCgroup if the process is not in a CPU cgroup.
+func FindCPUMountPoint(out []byte, scratch []byte) (int, error) {
+	checkBufferSize(out, PathSize)
+	checkBufferSize(scratch, ParseSize)
+
+	path := []byte("/proc/self/mountinfo\x00")
+	fd, errno := syscall.Open(&path[0], syscall.O_RDONLY|syscall.O_CLOEXEC, 0)
+	if errno == syscall.ENOENT {
+		return 0, ErrNoCgroup
+	} else if errno != 0 {
+		return 0, errSyscallFailed
+	}
+
+	n, err := parseCPUMount(fd, syscall.Read, out, scratch)
+	if err != nil {
+		syscall.Close(fd)
+		return 0, err
+	}
+	syscall.Close(fd)
+
+	return n, nil
+}
+
+// Returns the mount point for the cpu cgroup controller (v1 or v2) from
+// /proc/self/mountinfo.
+func parseCPUMount(fd int, read func(fd int, b []byte) (int, uintptr), out []byte, scratch []byte) (int, error) {
+	// The format of each line is:
+	//
+	// 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
+	// (1)(2)(3)   (4)   (5)      (6)      (7)   (8) (9)   (10)         (11)
+	//
+	// (1) mount ID:  unique identifier of the mount (may be reused after umount)
+	// (2) parent ID:  ID of parent (or of self for the top of the mount tree)
+	// (3) major:minor:  value of st_dev for files on filesystem
+	// (4) root:  root of the mount within the filesystem
+	// (5) mount point:  mount point relative to the process's root
+	// (6) mount options:  per mount options
+	// (7) optional fields:  zero or more fields of the form "tag[:value]"
+	// (8) separator:  marks the end of the optional fields
+	// (9) filesystem type:  name of filesystem of the form "type[.subtype]"
+	// (10) mount source:  filesystem specific information or "none"
+	// (11) super options:  per super block options
+	//
+	// See man 5 proc_pid_mountinfo for more details.
+	//
+	// Note that emitted paths will not contain space, tab, newline, or
+	// carriage return. Those are escaped. See Linux show_mountinfo ->
+	// show_path. We must unescape before returning.
+	//
+	// We return the mount point (5) if the filesystem type (9) is cgroup2,
+	// or cgroup with "cpu" in the super options (11).
+	//
+	// (4), (5), and (10) are up to _PATH_MAX. The remaining fields have a
+	// small fixed maximum size, so 4*_PATH_MAX is plenty of scratch space.
+	// Note that non-cgroup mounts may have arbitrarily long (11), but we
+	// can skip those when parsing.
+
+	l := newLineReader(fd, scratch, read)
+
+	// Bytes written to out.
+	n := 0
+
+	for {
+		//incomplete := false
+		err := l.next()
+		if err == errIncompleteLine {
+			// An incomplete line is fine as long as it doesn't
+			// impede parsing the fields we need. It shouldn't be
+			// possible for any mount to use more than 3*PATH_MAX
+			// before (9) because there are two paths and all other
+			// earlier fields have bounded options. Only (11) has
+			// unbounded options.
+		} else if err == errEOF {
+			break
+		} else if err != nil {
+			return 0, err
+		}
+
+		line := l.line()
+
+		// Skip first four fields.
+		for range 4 {
+			i := bytealg.IndexByte(line, ' ')
+			if i < 0 {
+				return 0, errMalformedFile
+			}
+			line = line[i+1:]
+		}
+
+		// (5) mount point:  mount point relative to the process's root
+		i := bytealg.IndexByte(line, ' ')
+		if i < 0 {
+			return 0, errMalformedFile
+		}
+		mnt := line[:i]
+		line = line[i+1:]
+
+		// Skip ahead past optional fields, delimited by " - ".
+		for {
+			i = bytealg.IndexByte(line, ' ')
+			if i < 0 {
+				return 0, errMalformedFile
+			}
+			if i+3 >= len(line) {
+				return 0, errMalformedFile
+			}
+			delim := line[i : i+3]
+			if string(delim) == " - " {
+				line = line[i+3:]
+				break
+			}
+			line = line[i+1:]
+		}
+
+		// (9) filesystem type:  name of filesystem of the form "type[.subtype]"
+		i = bytealg.IndexByte(line, ' ')
+		if i < 0 {
+			return 0, errMalformedFile
+		}
+		ftype := line[:i]
+		line = line[i+1:]
+
+		if string(ftype) != "cgroup" && string(ftype) != "cgroup2" {
+			continue
+		}
+
+		// As in findCPUPath, cgroup v1 with a CPU controller takes
+		// precendence over cgroup v2.
+		if string(ftype) == "cgroup2" {
+			// v2 hierarchy.
+			n, err = unescapePath(out, mnt)
+			if err != nil {
+				// Don't keep searching on error. The kernel
+				// should never produce broken escaping.
+				return n, err
+			}
+			// Keep searching, we might find a v1 hierarchy with a
+			// CPU controller, which takes precedence.
+			continue
+		}
+
+		// (10) mount source:  filesystem specific information or "none"
+		i = bytealg.IndexByte(line, ' ')
+		if i < 0 {
+			return 0, errMalformedFile
+		}
+		// Don't care about mount source.
+		line = line[i+1:]
+
+		// (11) super options:  per super block options
+		superOpt := line
+
+		// v1 hierarchy
+		if containsCPU(superOpt) {
+			// Found a v1 CPU controller. This must be the
+			// only one, so we're done.
+			return unescapePath(out, mnt)
+		}
+	}
+
+	if n == 0 {
+		// Found nothing.
+		return 0, ErrNoCgroup
+	}
+
+	return n, nil
+}
+
+var errInvalidEscape error = stringError("invalid path escape sequence")
+
+// unescapePath copies in to out, unescaping escape sequences generated by
+// Linux's show_path.
+//
+// That is, '\', ' ', '\t', and '\n' are converted to octal escape sequences,
+// like '\040' for space.
+//
+// out must be at least as large as in.
+//
+// Returns the number of bytes written to out.
+//
+// Also see escapePath in cgroup_linux_test.go.
+func unescapePath(out []byte, in []byte) (int, error) {
+	// Not strictly necessary, but simplifies the implementation and will
+	// always hold in users.
+	if len(out) < len(in) {
+		throw("output too small")
+	}
+
+	var outi, ini int
+	for ini < len(in) {
+		c := in[ini]
+		if c != '\\' {
+			out[outi] = c
+			outi++
+			ini++
+			continue
+		}
+
+		// Start of escape sequence.
+
+		// Escape sequence is always 4 characters: one slash and three
+		// digits.
+		if ini+3 >= len(in) {
+			return outi, errInvalidEscape
+		}
+
+		var outc byte
+		for i := range 3 {
+			c := in[ini+1+i]
+			if c < '0' || c > '9' {
+				return outi, errInvalidEscape
+			}
+
+			outc *= 8
+			outc += c - '0'
+		}
+
+		out[outi] = outc
+		outi++
+
+		ini += 4
+	}
+
+	return outi, nil
+}
diff --git a/src/internal/runtime/cgroup/cgroup_linux_test.go b/src/internal/runtime/cgroup/cgroup_linux_test.go
new file mode 100644
index 0000000000..d47fe42067
--- /dev/null
+++ b/src/internal/runtime/cgroup/cgroup_linux_test.go
@@ -0,0 +1,476 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cgroup_test
+
+import (
+	"fmt"
+	"internal/runtime/cgroup"
+	"io"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+const _PATH_MAX = 4096
+
+func TestParseV1Number(t *testing.T) {
+	tests := []struct {
+		name     string
+		contents string
+		want     int64
+		wantErr  bool
+	}{
+		{
+			name:     "disabled",
+			contents: "-1\n",
+			want:     -1,
+		},
+		{
+			name:     "500000",
+			contents: "500000\n",
+			want:     500000,
+		},
+		{
+			name:     "MaxInt64",
+			contents: "9223372036854775807\n",
+			want:     9223372036854775807,
+		},
+		{
+			name:     "missing-newline",
+			contents: "500000",
+			wantErr:  true,
+		},
+		{
+			name:     "not-a-number",
+			contents: "123max\n",
+			wantErr:  true,
+		},
+		{
+			name:     "v2",
+			contents: "1000 5000\n",
+			wantErr:  true,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			got, err := cgroup.ParseV1Number([]byte(tc.contents))
+			if tc.wantErr {
+				if err == nil {
+					t.Fatalf("parseV1Number got err nil want non-nil")
+				}
+				return
+			}
+			if err != nil {
+				t.Fatalf("parseV1Number got err %v want nil", err)
+			}
+
+			if got != tc.want {
+				t.Errorf("parseV1Number got %d want %d", got, tc.want)
+			}
+		})
+	}
+}
+
+func TestParseV2Limit(t *testing.T) {
+	tests := []struct {
+		name     string
+		contents string
+		want     float64
+		wantOK   bool
+		wantErr  bool
+	}{
+		{
+			name:     "disabled",
+			contents: "max 100000\n",
+			wantOK:   false,
+		},
+		{
+			name:     "5",
+			contents: "500000 100000\n",
+			want:     5,
+			wantOK:   true,
+		},
+		{
+			name:     "0.5",
+			contents: "50000 100000\n",
+			want:     0.5,
+			wantOK:   true,
+		},
+		{
+			name:     "2.5",
+			contents: "250000 100000\n",
+			want:     2.5,
+			wantOK:   true,
+		},
+		{
+			name:     "MaxInt64",
+			contents: "9223372036854775807 9223372036854775807\n",
+			want:     1,
+			wantOK:   true,
+		},
+		{
+			name:     "missing-newline",
+			contents: "500000 100000",
+			wantErr:  true,
+		},
+		{
+			name:     "v1",
+			contents: "500000\n",
+			wantErr:  true,
+		},
+		{
+			name:     "quota-not-a-number",
+			contents: "500000us 100000\n",
+			wantErr:  true,
+		},
+		{
+			name:     "period-not-a-number",
+			contents: "500000 100000us\n",
+			wantErr:  true,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			got, gotOK, err := cgroup.ParseV2Limit([]byte(tc.contents))
+			if tc.wantErr {
+				if err == nil {
+					t.Fatalf("parseV1Limit got err nil want non-nil")
+				}
+				return
+			}
+			if err != nil {
+				t.Fatalf("parseV2Limit got err %v want nil", err)
+			}
+
+			if gotOK != tc.wantOK {
+				t.Errorf("parseV2Limit got ok %v want %v", gotOK, tc.wantOK)
+			}
+
+			if tc.wantOK && got != tc.want {
+				t.Errorf("parseV2Limit got %f want %f", got, tc.want)
+			}
+		})
+	}
+}
+
+func TestParseCPURelativePath(t *testing.T) {
+	tests := []struct {
+		name     string
+		contents string
+		want     string
+		wantVer  cgroup.Version
+		wantErr  bool
+	}{
+		{
+			name:     "empty",
+			contents: "",
+			wantErr:  true,
+		},
+		{
+			name: "v1",
+			contents: `2:cpu,cpuacct:/a/b/cpu
+1:blkio:/a/b/blkio
+`,
+			want:    "/a/b/cpu",
+			wantVer: cgroup.V1,
+		},
+		{
+			name:     "v2",
+			contents: "0::/a/b/c\n",
+			want:     "/a/b/c",
+			wantVer:  cgroup.V2,
+		},
+		{
+			name: "mixed",
+			contents: `2:cpu,cpuacct:/a/b/cpu
+1:blkio:/a/b/blkio
+0::/a/b/v2
+`,
+			want:    "/a/b/cpu",
+			wantVer: cgroup.V1,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			r := strings.NewReader(tc.contents)
+			read := func(fd int, b []byte) (int, uintptr) {
+				n, err := r.Read(b)
+				if err != nil && err != io.EOF {
+					const dummyErrno = 42
+					return n, dummyErrno
+				}
+				return n, 0
+			}
+
+			var got [cgroup.PathSize]byte
+			var scratch [cgroup.ParseSize]byte
+			n, gotVer, err := cgroup.ParseCPURelativePath(0, read, got[:], scratch[:])
+			if (err != nil) != tc.wantErr {
+				t.Fatalf("parseCPURelativePath got err %v want %v", err, tc.wantErr)
+			}
+
+			if gotVer != tc.wantVer {
+				t.Errorf("parseCPURelativePath got cgroup version %d want %d", gotVer, tc.wantVer)
+			}
+
+			if string(got[:n]) != tc.want {
+				t.Errorf("parseCPURelativePath got %q want %q", string(got[:n]), tc.want)
+			}
+		})
+	}
+}
+
+func TestContainsCPU(t *testing.T) {
+	tests := []struct {
+		in   string
+		want bool
+	}{
+		{
+			in:   "",
+			want: false,
+		},
+		{
+			in:   ",",
+			want: false,
+		},
+		{
+			in:   "cpu",
+			want: true,
+		},
+		{
+			in:   "memory,cpu",
+			want: true,
+		},
+		{
+			in:   "cpu,memory",
+			want: true,
+		},
+		{
+			in:   "memory,cpu,block",
+			want: true,
+		},
+		{
+			in:   "memory,cpuacct,block",
+			want: false,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.in, func(t *testing.T) {
+			got := cgroup.ContainsCPU([]byte(tc.in))
+			if got != tc.want {
+				t.Errorf("containsCPU(%q) got %v want %v", tc.in, got, tc.want)
+			}
+		})
+	}
+}
+
+func TestParseCPUMount(t *testing.T) {
+	// Used for v2-longline. We want an overlayfs mount to have an option
+	// so long that the entire line can't possibly fit in the scratch
+	// buffer.
+	const lowerPath = "/so/many/overlay/layers"
+	overlayLongLowerDir := lowerPath
+	for i := 0; len(overlayLongLowerDir) < cgroup.ScratchSize; i++ {
+		overlayLongLowerDir += fmt.Sprintf(":%s%d", lowerPath, i)
+	}
+
+	tests := []struct {
+		name     string
+		contents string
+		want     string
+		wantErr  bool
+	}{
+		{
+			name:     "empty",
+			contents: "",
+			wantErr:  true,
+		},
+		{
+			name: "v1",
+			contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
+20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
+21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
+49 22 0:37 / /sys/fs/cgroup/memory rw - cgroup cgroup rw,memory
+54 22 0:38 / /sys/fs/cgroup/io rw - cgroup cgroup rw,io
+56 22 0:40 / /sys/fs/cgroup/cpu rw - cgroup cgroup rw,cpu,cpuacct
+58 22 0:42 / /sys/fs/cgroup/net rw - cgroup cgroup rw,net
+59 22 0:43 / /sys/fs/cgroup/cpuset rw - cgroup cgroup rw,cpuset
+`,
+			want: "/sys/fs/cgroup/cpu",
+		},
+		{
+			name: "v2",
+			contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
+20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
+21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
+25 21 0:22 / /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
+`,
+			want: "/sys/fs/cgroup",
+		},
+		{
+			name: "mixed",
+			contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
+20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
+21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
+25 21 0:22 / /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
+49 22 0:37 / /sys/fs/cgroup/memory rw - cgroup cgroup rw,memory
+54 22 0:38 / /sys/fs/cgroup/io rw - cgroup cgroup rw,io
+56 22 0:40 / /sys/fs/cgroup/cpu rw - cgroup cgroup rw,cpu,cpuacct
+58 22 0:42 / /sys/fs/cgroup/net rw - cgroup cgroup rw,net
+59 22 0:43 / /sys/fs/cgroup/cpuset rw - cgroup cgroup rw,cpuset
+`,
+			want: "/sys/fs/cgroup/cpu",
+		},
+		{
+			name: "v2-escaped",
+			contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
+20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
+21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
+25 21 0:22 / /sys/fs/cgroup/tab\011tab rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
+`,
+			want: `/sys/fs/cgroup/tab	tab`,
+		},
+		{
+			// Overly long line on a different mount doesn't matter.
+			name: "v2-longline",
+			contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
+20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
+21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
+262 31 0:72 / /tmp/overlay2/0143e063b02f4801de9c847ad1c5ddc21fd2ead00653064d0c72ea967b248870/merged rw,relatime shared:729 - overlay overlay rw,lowerdir=` + overlayLongLowerDir + `,upperdir=/tmp/diff,workdir=/tmp/work
+25 21 0:22 / /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
+`,
+			want: "/sys/fs/cgroup",
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			r := strings.NewReader(tc.contents)
+			read := func(fd int, b []byte) (int, uintptr) {
+				n, err := r.Read(b)
+				if err != nil && err != io.EOF {
+					const dummyErrno = 42
+					return n, dummyErrno
+				}
+				return n, 0
+			}
+
+			var got [cgroup.PathSize]byte
+			var scratch [cgroup.ParseSize]byte
+			n, err := cgroup.ParseCPUMount(0, read, got[:], scratch[:])
+			if (err != nil) != tc.wantErr {
+				t.Fatalf("parseCPUMount got err %v want %v", err, tc.wantErr)
+			}
+
+			if string(got[:n]) != tc.want {
+				t.Errorf("parseCPUMount got %q want %q", string(got[:n]), tc.want)
+			}
+		})
+	}
+}
+
+// escapePath performs escaping equivalent to Linux's show_path.
+//
+// That is, '\', ' ', '\t', and '\n' are converted to octal escape sequences,
+// like '\040' for space.
+func escapePath(s string) string {
+	out := make([]rune, 0, len(s))
+	for _, c := range s {
+		switch c {
+		case '\\', ' ', '\t', '\n':
+			out = append(out, '\\')
+			cs := strconv.FormatInt(int64(c), 8)
+			if len(cs) <= 2 {
+				out = append(out, '0')
+			}
+			if len(cs) <= 1 {
+				out = append(out, '0')
+			}
+			for _, csc := range cs {
+				out = append(out, csc)
+			}
+		default:
+			out = append(out, c)
+		}
+	}
+	return string(out)
+}
+
+func TestEscapePath(t *testing.T) {
+	tests := []struct {
+		name      string
+		unescaped string
+		escaped   string
+	}{
+		{
+			name:      "boring",
+			unescaped: `/a/b/c`,
+			escaped:   `/a/b/c`,
+		},
+		{
+			name:      "space",
+			unescaped: `/a/b b/c`,
+			escaped:   `/a/b\040b/c`,
+		},
+		{
+			name:      "tab",
+			unescaped: `/a/b	b/c`,
+			escaped:   `/a/b\011b/c`,
+		},
+		{
+			name: "newline",
+			unescaped: `/a/b
+b/c`,
+			escaped: `/a/b\012b/c`,
+		},
+		{
+			name:      "slash",
+			unescaped: `/a/b\b/c`,
+			escaped:   `/a/b\134b/c`,
+		},
+		{
+			name:      "beginning",
+			unescaped: `\b/c`,
+			escaped:   `\134b/c`,
+		},
+		{
+			name:      "ending",
+			unescaped: `/a/\`,
+			escaped:   `/a/\134`,
+		},
+	}
+
+	t.Run("escapePath", func(t *testing.T) {
+		for _, tc := range tests {
+			t.Run(tc.name, func(t *testing.T) {
+				got := escapePath(tc.unescaped)
+				if got != tc.escaped {
+					t.Errorf("escapePath got %q want %q", got, tc.escaped)
+				}
+			})
+		}
+	})
+
+	t.Run("unescapePath", func(t *testing.T) {
+		for _, tc := range tests {
+			t.Run(tc.name, func(t *testing.T) {
+				in := []byte(tc.escaped)
+				out := make([]byte, len(in))
+				n, err := cgroup.UnescapePath(out, in)
+				if err != nil {
+					t.Errorf("unescapePath got err %v want nil", err)
+				}
+				got := string(out[:n])
+				if got != tc.unescaped {
+					t.Errorf("unescapePath got %q want %q", got, tc.escaped)
+				}
+			})
+		}
+	})
+}
diff --git a/src/internal/runtime/cgroup/export_linux_test.go b/src/internal/runtime/cgroup/export_linux_test.go
new file mode 100644
index 0000000000..653fcd1b2f
--- /dev/null
+++ b/src/internal/runtime/cgroup/export_linux_test.go
@@ -0,0 +1,15 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cgroup
+
+var ContainsCPU = containsCPU
+
+var ParseV1Number = parseV1Number
+var ParseV2Limit = parseV2Limit
+
+var ParseCPURelativePath = parseCPURelativePath
+var ParseCPUMount = parseCPUMount
+
+var UnescapePath = unescapePath
diff --git a/src/internal/runtime/cgroup/runtime.go b/src/internal/runtime/cgroup/runtime.go
new file mode 100644
index 0000000000..39c9295b07
--- /dev/null
+++ b/src/internal/runtime/cgroup/runtime.go
@@ -0,0 +1,14 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cgroup
+
+import (
+	_ "unsafe" // for linkname
+)
+
+// Functions below pushed from runtime.
+
+//go:linkname throw
+func throw(s string)
diff --git a/src/runtime/panic.go b/src/runtime/panic.go
index b8f23cc3c2..95305b84bc 100644
--- a/src/runtime/panic.go
+++ b/src/runtime/panic.go
@@ -1056,6 +1056,11 @@ func internal_sync_fatal(s string) {
 	fatal(s)
 }
 
+//go:linkname cgroup_throw internal/runtime/cgroup.throw
+func cgroup_throw(s string) {
+	throw(s)
+}
+
 // throw triggers a fatal error that dumps a stack trace and exits.
 //
 // throw should be used for runtime-internal fatal errors where Go itself,
-- 
2.52.0