From: Michael Knyszek Date: Wed, 19 Oct 2022 18:51:15 +0000 (-0400) Subject: runtime: add page tracer X-Git-Tag: go1.20rc1~170 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=e4435cb8448514d2413f9d9aa3ee40738d26fd67;p=gostls13.git runtime: add page tracer This change adds a new GODEBUG flag called pagetrace that writes a low-overhead trace of how pages of memory are managed by the Go runtime. The page tracer is kept behind a GOEXPERIMENT flag due to a potential security risk for setuid binaries. Change-Id: I6f4a2447d02693c25214400846a5d2832ad6e5c0 Reviewed-on: https://go-review.googlesource.com/c/go/+/444157 Reviewed-by: Austin Clements Reviewed-by: David Chase Run-TryBot: Michael Knyszek TryBot-Result: Gopher Robot --- diff --git a/src/internal/goexperiment/exp_pagetrace_off.go b/src/internal/goexperiment/exp_pagetrace_off.go new file mode 100644 index 0000000000..789e88332d --- /dev/null +++ b/src/internal/goexperiment/exp_pagetrace_off.go @@ -0,0 +1,9 @@ +// Code generated by mkconsts.go. DO NOT EDIT. + +//go:build !goexperiment.pagetrace +// +build !goexperiment.pagetrace + +package goexperiment + +const PageTrace = false +const PageTraceInt = 0 diff --git a/src/internal/goexperiment/exp_pagetrace_on.go b/src/internal/goexperiment/exp_pagetrace_on.go new file mode 100644 index 0000000000..ea72b54863 --- /dev/null +++ b/src/internal/goexperiment/exp_pagetrace_on.go @@ -0,0 +1,9 @@ +// Code generated by mkconsts.go. DO NOT EDIT. + +//go:build goexperiment.pagetrace +// +build goexperiment.pagetrace + +package goexperiment + +const PageTrace = true +const PageTraceInt = 1 diff --git a/src/internal/goexperiment/flags.go b/src/internal/goexperiment/flags.go index 16793f37ac..02e744362c 100644 --- a/src/internal/goexperiment/flags.go +++ b/src/internal/goexperiment/flags.go @@ -94,4 +94,10 @@ type Flags struct { // Arenas causes the "arena" standard library package to be visible // to the outside world. Arenas bool + + // PageTrace enables GODEBUG=pagetrace=/path/to/result. This feature + // is a GOEXPERIMENT due to a security risk with setuid binaries: + // this compels the Go runtime to write to some arbitrary file, which + // may be exploited. + PageTrace bool } diff --git a/src/runtime/create_file_nounix.go b/src/runtime/create_file_nounix.go new file mode 100644 index 0000000000..60f75175a2 --- /dev/null +++ b/src/runtime/create_file_nounix.go @@ -0,0 +1,14 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !unix + +package runtime + +const canCreateFile = false + +func create(name *byte, perm int32) int32 { + throw("unimplemented") + return -1 +} diff --git a/src/runtime/create_file_unix.go b/src/runtime/create_file_unix.go new file mode 100644 index 0000000000..7280810ed2 --- /dev/null +++ b/src/runtime/create_file_unix.go @@ -0,0 +1,14 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build unix + +package runtime + +const canCreateFile = true + +// create returns an fd to a write-only file. +func create(name *byte, perm int32) int32 { + return open(name, _O_CREAT|_O_WRONLY|_O_TRUNC, perm) +} diff --git a/src/runtime/defs1_netbsd_386.go b/src/runtime/defs1_netbsd_386.go index 33ce63cd37..f7fe45b4ab 100644 --- a/src/runtime/defs1_netbsd_386.go +++ b/src/runtime/defs1_netbsd_386.go @@ -8,7 +8,10 @@ const ( _EFAULT = 0xe _EAGAIN = 0x23 + _O_WRONLY = 0x1 _O_NONBLOCK = 0x4 + _O_CREAT = 0x200 + _O_TRUNC = 0x400 _O_CLOEXEC = 0x400000 _PROT_NONE = 0x0 diff --git a/src/runtime/defs1_netbsd_amd64.go b/src/runtime/defs1_netbsd_amd64.go index b87833be65..80908cd931 100644 --- a/src/runtime/defs1_netbsd_amd64.go +++ b/src/runtime/defs1_netbsd_amd64.go @@ -8,7 +8,10 @@ const ( _EFAULT = 0xe _EAGAIN = 0x23 + _O_WRONLY = 0x1 _O_NONBLOCK = 0x4 + _O_CREAT = 0x200 + _O_TRUNC = 0x400 _O_CLOEXEC = 0x400000 _PROT_NONE = 0x0 diff --git a/src/runtime/defs1_netbsd_arm.go b/src/runtime/defs1_netbsd_arm.go index 9cb680e360..c63e592ff1 100644 --- a/src/runtime/defs1_netbsd_arm.go +++ b/src/runtime/defs1_netbsd_arm.go @@ -8,7 +8,10 @@ const ( _EFAULT = 0xe _EAGAIN = 0x23 + _O_WRONLY = 0x1 _O_NONBLOCK = 0x4 + _O_CREAT = 0x200 + _O_TRUNC = 0x400 _O_CLOEXEC = 0x400000 _PROT_NONE = 0x0 diff --git a/src/runtime/defs1_netbsd_arm64.go b/src/runtime/defs1_netbsd_arm64.go index 1c05ce6ad8..804b5b0b3f 100644 --- a/src/runtime/defs1_netbsd_arm64.go +++ b/src/runtime/defs1_netbsd_arm64.go @@ -8,7 +8,10 @@ const ( _EFAULT = 0xe _EAGAIN = 0x23 + _O_WRONLY = 0x1 _O_NONBLOCK = 0x4 + _O_CREAT = 0x200 + _O_TRUNC = 0x400 _O_CLOEXEC = 0x400000 _PROT_NONE = 0x0 diff --git a/src/runtime/defs1_solaris_amd64.go b/src/runtime/defs1_solaris_amd64.go index 003cd1c42c..bb53c22e06 100644 --- a/src/runtime/defs1_solaris_amd64.go +++ b/src/runtime/defs1_solaris_amd64.go @@ -91,7 +91,10 @@ const ( _MAXHOSTNAMELEN = 0x100 + _O_WRONLY = 0x1 _O_NONBLOCK = 0x80 + _O_TRUNC = 0x200 + _O_CREAT = 0x100 _O_CLOEXEC = 0x800000 _FD_CLOEXEC = 0x1 _F_GETFL = 0x3 diff --git a/src/runtime/defs_aix.go b/src/runtime/defs_aix.go index b794cd5de8..389598966a 100644 --- a/src/runtime/defs_aix.go +++ b/src/runtime/defs_aix.go @@ -124,7 +124,10 @@ const ( _ITIMER_PROF = C.ITIMER_PROF _O_RDONLY = C.O_RDONLY + _O_WRONLY = C.O_WRONLY _O_NONBLOCK = C.O_NONBLOCK + _O_CREAT = C.O_CREAT + _O_TRUNC = C.O_TRUNC _SS_DISABLE = C.SS_DISABLE _SI_USER = C.SI_USER diff --git a/src/runtime/defs_aix_ppc64.go b/src/runtime/defs_aix_ppc64.go index 4e20c85841..2d25b7ce3c 100644 --- a/src/runtime/defs_aix_ppc64.go +++ b/src/runtime/defs_aix_ppc64.go @@ -81,7 +81,10 @@ const ( _ITIMER_PROF = 0x2 _O_RDONLY = 0x0 + _O_WRONLY = 0x1 _O_NONBLOCK = 0x4 + _O_CREAT = 0x100 + _O_TRUNC = 0x200 _SS_DISABLE = 0x2 _SI_USER = 0x0 diff --git a/src/runtime/defs_darwin.go b/src/runtime/defs_darwin.go index 59b81cf713..89e4253f33 100644 --- a/src/runtime/defs_darwin.go +++ b/src/runtime/defs_darwin.go @@ -120,7 +120,10 @@ const ( F_SETFL = C.F_SETFL FD_CLOEXEC = C.FD_CLOEXEC + O_WRONLY = C.O_WRONLY O_NONBLOCK = C.O_NONBLOCK + O_CREAT = C.O_CREAT + O_TRUNC = C.O_TRUNC ) type StackT C.struct_sigaltstack diff --git a/src/runtime/defs_darwin_amd64.go b/src/runtime/defs_darwin_amd64.go index cbc26bfcff..84e6f37203 100644 --- a/src/runtime/defs_darwin_amd64.go +++ b/src/runtime/defs_darwin_amd64.go @@ -99,7 +99,10 @@ const ( _F_SETFL = 0x4 _FD_CLOEXEC = 0x1 - _O_NONBLOCK = 4 + _O_WRONLY = 0x1 + _O_NONBLOCK = 0x4 + _O_CREAT = 0x200 + _O_TRUNC = 0x400 ) type stackt struct { diff --git a/src/runtime/defs_darwin_arm64.go b/src/runtime/defs_darwin_arm64.go index 9076e8bd54..30d7443f2e 100644 --- a/src/runtime/defs_darwin_arm64.go +++ b/src/runtime/defs_darwin_arm64.go @@ -101,7 +101,10 @@ const ( _F_SETFL = 0x4 _FD_CLOEXEC = 0x1 - _O_NONBLOCK = 4 + _O_WRONLY = 0x1 + _O_NONBLOCK = 0x4 + _O_CREAT = 0x200 + _O_TRUNC = 0x400 ) type stackt struct { diff --git a/src/runtime/defs_dragonfly.go b/src/runtime/defs_dragonfly.go index f67835692c..9dcfdf0fd7 100644 --- a/src/runtime/defs_dragonfly.go +++ b/src/runtime/defs_dragonfly.go @@ -32,7 +32,10 @@ const ( EBUSY = C.EBUSY EAGAIN = C.EAGAIN + O_WRONLY = C.O_WRONLY O_NONBLOCK = C.O_NONBLOCK + O_CREAT = C.O_CREAT + O_TRUNC = C.O_TRUNC O_CLOEXEC = C.O_CLOEXEC PROT_NONE = C.PROT_NONE diff --git a/src/runtime/defs_dragonfly_amd64.go b/src/runtime/defs_dragonfly_amd64.go index 7ab5fecb90..f1a2302f28 100644 --- a/src/runtime/defs_dragonfly_amd64.go +++ b/src/runtime/defs_dragonfly_amd64.go @@ -11,7 +11,10 @@ const ( _EBUSY = 0x10 _EAGAIN = 0x23 + _O_WRONLY = 0x1 _O_NONBLOCK = 0x4 + _O_CREAT = 0x200 + _O_TRUNC = 0x400 _O_CLOEXEC = 0x20000 _PROT_NONE = 0x0 diff --git a/src/runtime/defs_freebsd.go b/src/runtime/defs_freebsd.go index 87229fabe8..d86ae9133a 100644 --- a/src/runtime/defs_freebsd.go +++ b/src/runtime/defs_freebsd.go @@ -51,7 +51,10 @@ const ( EAGAIN = C.EAGAIN ETIMEDOUT = C.ETIMEDOUT + O_WRONLY = C.O_WRONLY O_NONBLOCK = C.O_NONBLOCK + O_CREAT = C.O_CREAT + O_TRUNC = C.O_TRUNC O_CLOEXEC = C.O_CLOEXEC PROT_NONE = C.PROT_NONE diff --git a/src/runtime/defs_freebsd_386.go b/src/runtime/defs_freebsd_386.go index 021caed255..ee8274188a 100644 --- a/src/runtime/defs_freebsd_386.go +++ b/src/runtime/defs_freebsd_386.go @@ -19,7 +19,10 @@ const ( _EAGAIN = 0x23 _ETIMEDOUT = 0x3c + _O_WRONLY = 0x1 _O_NONBLOCK = 0x4 + _O_CREAT = 0x200 + _O_TRUNC = 0x400 _O_CLOEXEC = 0x100000 _PROT_NONE = 0x0 diff --git a/src/runtime/defs_freebsd_amd64.go b/src/runtime/defs_freebsd_amd64.go index d747769696..9003f92015 100644 --- a/src/runtime/defs_freebsd_amd64.go +++ b/src/runtime/defs_freebsd_amd64.go @@ -19,7 +19,10 @@ const ( _EAGAIN = 0x23 _ETIMEDOUT = 0x3c + _O_WRONLY = 0x1 _O_NONBLOCK = 0x4 + _O_CREAT = 0x200 + _O_TRUNC = 0x400 _O_CLOEXEC = 0x100000 _PROT_NONE = 0x0 diff --git a/src/runtime/defs_freebsd_arm.go b/src/runtime/defs_freebsd_arm.go index 05cfc818dd..68cc1b9545 100644 --- a/src/runtime/defs_freebsd_arm.go +++ b/src/runtime/defs_freebsd_arm.go @@ -19,7 +19,10 @@ const ( _EAGAIN = 0x23 _ETIMEDOUT = 0x3c + _O_WRONLY = 0x1 _O_NONBLOCK = 0x4 + _O_CREAT = 0x200 + _O_TRUNC = 0x400 _O_CLOEXEC = 0x100000 _PROT_NONE = 0x0 diff --git a/src/runtime/defs_freebsd_arm64.go b/src/runtime/defs_freebsd_arm64.go index 4b2792cebc..1d6723621a 100644 --- a/src/runtime/defs_freebsd_arm64.go +++ b/src/runtime/defs_freebsd_arm64.go @@ -19,7 +19,10 @@ const ( _EAGAIN = 0x23 _ETIMEDOUT = 0x3c + _O_WRONLY = 0x1 _O_NONBLOCK = 0x4 + _O_CREAT = 0x200 + _O_TRUNC = 0x400 _O_CLOEXEC = 0x100000 _PROT_NONE = 0x0 diff --git a/src/runtime/defs_freebsd_riscv64.go b/src/runtime/defs_freebsd_riscv64.go index 8266ca0824..b977bde551 100644 --- a/src/runtime/defs_freebsd_riscv64.go +++ b/src/runtime/defs_freebsd_riscv64.go @@ -18,7 +18,10 @@ const ( _EAGAIN = 0x23 _ETIMEDOUT = 0x3c + _O_WRONLY = 0x1 _O_NONBLOCK = 0x4 + _O_CREAT = 0x200 + _O_TRUNC = 0x400 _O_CLOEXEC = 0x100000 _PROT_NONE = 0x0 diff --git a/src/runtime/defs_linux_386.go b/src/runtime/defs_linux_386.go index 7027286f5c..72339f4aa5 100644 --- a/src/runtime/defs_linux_386.go +++ b/src/runtime/defs_linux_386.go @@ -90,6 +90,9 @@ const ( _SIGEV_THREAD_ID = 0x4 _O_RDONLY = 0x0 + _O_WRONLY = 0x1 + _O_CREAT = 0x40 + _O_TRUNC = 0x200 _O_NONBLOCK = 0x800 _O_CLOEXEC = 0x80000 diff --git a/src/runtime/defs_linux_amd64.go b/src/runtime/defs_linux_amd64.go index 4ce6146177..298f3ebf7c 100644 --- a/src/runtime/defs_linux_amd64.go +++ b/src/runtime/defs_linux_amd64.go @@ -165,6 +165,9 @@ type sigevent struct { const ( _O_RDONLY = 0x0 + _O_WRONLY = 0x1 + _O_CREAT = 0x40 + _O_TRUNC = 0x200 _O_NONBLOCK = 0x800 _O_CLOEXEC = 0x80000 ) diff --git a/src/runtime/defs_linux_arm.go b/src/runtime/defs_linux_arm.go index 999c11431b..6fee57dacf 100644 --- a/src/runtime/defs_linux_arm.go +++ b/src/runtime/defs_linux_arm.go @@ -80,6 +80,9 @@ const ( _ITIMER_PROF = 0x2 _ITIMER_VIRTUAL = 0x1 _O_RDONLY = 0 + _O_WRONLY = 0x1 + _O_CREAT = 0x40 + _O_TRUNC = 0x200 _O_NONBLOCK = 0x800 _O_CLOEXEC = 0x80000 diff --git a/src/runtime/defs_linux_arm64.go b/src/runtime/defs_linux_arm64.go index 73f26a9b00..0216096301 100644 --- a/src/runtime/defs_linux_arm64.go +++ b/src/runtime/defs_linux_arm64.go @@ -165,6 +165,9 @@ type sigevent struct { const ( _O_RDONLY = 0x0 + _O_WRONLY = 0x1 + _O_CREAT = 0x40 + _O_TRUNC = 0x200 _O_NONBLOCK = 0x800 _O_CLOEXEC = 0x80000 ) diff --git a/src/runtime/defs_linux_loong64.go b/src/runtime/defs_linux_loong64.go index 956121d0fc..6eca18bdae 100644 --- a/src/runtime/defs_linux_loong64.go +++ b/src/runtime/defs_linux_loong64.go @@ -137,6 +137,9 @@ type sigevent struct { const ( _O_RDONLY = 0x0 + _O_WRONLY = 0x1 + _O_CREAT = 0x40 + _O_TRUNC = 0x200 _O_NONBLOCK = 0x800 _O_CLOEXEC = 0x80000 ) diff --git a/src/runtime/defs_linux_mips64x.go b/src/runtime/defs_linux_mips64x.go index 30157fa808..2e8c4056ba 100644 --- a/src/runtime/defs_linux_mips64x.go +++ b/src/runtime/defs_linux_mips64x.go @@ -169,6 +169,9 @@ type sigevent struct { const ( _O_RDONLY = 0x0 + _O_WRONLY = 0x1 + _O_CREAT = 0x100 + _O_TRUNC = 0x200 _O_NONBLOCK = 0x80 _O_CLOEXEC = 0x80000 _SA_RESTORER = 0 diff --git a/src/runtime/defs_linux_mipsx.go b/src/runtime/defs_linux_mipsx.go index 5d779778c9..7593600cc6 100644 --- a/src/runtime/defs_linux_mipsx.go +++ b/src/runtime/defs_linux_mipsx.go @@ -163,7 +163,10 @@ type sigevent struct { const ( _O_RDONLY = 0x0 + _O_WRONLY = 0x1 _O_NONBLOCK = 0x80 + _O_CREAT = 0x100 + _O_TRUNC = 0x200 _O_CLOEXEC = 0x80000 _SA_RESTORER = 0 ) diff --git a/src/runtime/defs_linux_ppc64.go b/src/runtime/defs_linux_ppc64.go index 36d021a727..bb3ac016e5 100644 --- a/src/runtime/defs_linux_ppc64.go +++ b/src/runtime/defs_linux_ppc64.go @@ -166,6 +166,9 @@ type sigevent struct { const ( _O_RDONLY = 0x0 + _O_WRONLY = 0x1 + _O_CREAT = 0x40 + _O_TRUNC = 0x200 _O_NONBLOCK = 0x800 _O_CLOEXEC = 0x80000 _SA_RESTORER = 0 diff --git a/src/runtime/defs_linux_ppc64le.go b/src/runtime/defs_linux_ppc64le.go index 36d021a727..bb3ac016e5 100644 --- a/src/runtime/defs_linux_ppc64le.go +++ b/src/runtime/defs_linux_ppc64le.go @@ -166,6 +166,9 @@ type sigevent struct { const ( _O_RDONLY = 0x0 + _O_WRONLY = 0x1 + _O_CREAT = 0x40 + _O_TRUNC = 0x200 _O_NONBLOCK = 0x800 _O_CLOEXEC = 0x80000 _SA_RESTORER = 0 diff --git a/src/runtime/defs_linux_riscv64.go b/src/runtime/defs_linux_riscv64.go index 116a842e58..ce4a7f36cd 100644 --- a/src/runtime/defs_linux_riscv64.go +++ b/src/runtime/defs_linux_riscv64.go @@ -162,6 +162,9 @@ type sigevent struct { const ( _O_RDONLY = 0x0 + _O_WRONLY = 0x1 + _O_CREAT = 0x40 + _O_TRUNC = 0x200 _O_NONBLOCK = 0x800 _O_CLOEXEC = 0x80000 ) diff --git a/src/runtime/defs_linux_s390x.go b/src/runtime/defs_linux_s390x.go index 79a54fe7e7..36497dd40d 100644 --- a/src/runtime/defs_linux_s390x.go +++ b/src/runtime/defs_linux_s390x.go @@ -159,6 +159,9 @@ type sigevent struct { const ( _O_RDONLY = 0x0 + _O_WRONLY = 0x1 + _O_CREAT = 0x40 + _O_TRUNC = 0x200 _O_NONBLOCK = 0x800 _O_CLOEXEC = 0x80000 _SA_RESTORER = 0 diff --git a/src/runtime/defs_netbsd.go b/src/runtime/defs_netbsd.go index 865b946000..43923e3075 100644 --- a/src/runtime/defs_netbsd.go +++ b/src/runtime/defs_netbsd.go @@ -34,7 +34,10 @@ const ( EFAULT = C.EFAULT EAGAIN = C.EAGAIN + O_WRONLY = C.O_WRONLY O_NONBLOCK = C.O_NONBLOCK + O_CREAT = C.O_CREAT + O_TRUNC = C.O_TRUNC O_CLOEXEC = C.O_CLOEXEC PROT_NONE = C.PROT_NONE diff --git a/src/runtime/defs_openbsd_386.go b/src/runtime/defs_openbsd_386.go index 72a66ae084..25524c5982 100644 --- a/src/runtime/defs_openbsd_386.go +++ b/src/runtime/defs_openbsd_386.go @@ -10,7 +10,10 @@ const ( _EFAULT = 0xe _EAGAIN = 0x23 + _O_WRONLY = 0x1 _O_NONBLOCK = 0x4 + _O_CREAT = 0x200 + _O_TRUNC = 0x400 _O_CLOEXEC = 0x10000 _PROT_NONE = 0x0 diff --git a/src/runtime/defs_openbsd_amd64.go b/src/runtime/defs_openbsd_amd64.go index b1e8dca359..a31d03bce5 100644 --- a/src/runtime/defs_openbsd_amd64.go +++ b/src/runtime/defs_openbsd_amd64.go @@ -10,7 +10,10 @@ const ( _EFAULT = 0xe _EAGAIN = 0x23 + _O_WRONLY = 0x1 _O_NONBLOCK = 0x4 + _O_CREAT = 0x200 + _O_TRUNC = 0x400 _O_CLOEXEC = 0x10000 _PROT_NONE = 0x0 diff --git a/src/runtime/defs_openbsd_arm.go b/src/runtime/defs_openbsd_arm.go index ba6b2c17bf..1d1767bd3b 100644 --- a/src/runtime/defs_openbsd_arm.go +++ b/src/runtime/defs_openbsd_arm.go @@ -10,7 +10,10 @@ const ( _EFAULT = 0xe _EAGAIN = 0x23 + _O_WRONLY = 0x1 _O_NONBLOCK = 0x4 + _O_CREAT = 0x200 + _O_TRUNC = 0x400 _O_CLOEXEC = 0x10000 _PROT_NONE = 0x0 diff --git a/src/runtime/defs_openbsd_arm64.go b/src/runtime/defs_openbsd_arm64.go index 13d2e95a86..745d0d3923 100644 --- a/src/runtime/defs_openbsd_arm64.go +++ b/src/runtime/defs_openbsd_arm64.go @@ -11,7 +11,10 @@ const ( _EFAULT = 0xe _EAGAIN = 0x23 + _O_WRONLY = 0x1 _O_NONBLOCK = 0x4 + _O_CREAT = 0x200 + _O_TRUNC = 0x400 _O_CLOEXEC = 0x10000 _PROT_NONE = 0x0 diff --git a/src/runtime/defs_openbsd_mips64.go b/src/runtime/defs_openbsd_mips64.go index c0bc766d8f..1e469e4fec 100644 --- a/src/runtime/defs_openbsd_mips64.go +++ b/src/runtime/defs_openbsd_mips64.go @@ -17,7 +17,10 @@ const ( _EFAULT = 0xe _EAGAIN = 0x23 + _O_WRONLY = 0x1 _O_NONBLOCK = 0x4 + _O_CREAT = 0x200 + _O_TRUNC = 0x400 _O_CLOEXEC = 0x10000 _PROT_NONE = 0x0 diff --git a/src/runtime/defs_solaris.go b/src/runtime/defs_solaris.go index bb89eec1bf..406304de5a 100644 --- a/src/runtime/defs_solaris.go +++ b/src/runtime/defs_solaris.go @@ -120,7 +120,10 @@ const ( MAXHOSTNAMELEN = C.MAXHOSTNAMELEN + O_WRONLY = C.O_WRONLY O_NONBLOCK = C.O_NONBLOCK + O_CREAT = C.O_CREAT + O_TRUNC = C.O_TRUNC O_CLOEXEC = C.O_CLOEXEC FD_CLOEXEC = C.FD_CLOEXEC F_GETFL = C.F_GETFL diff --git a/src/runtime/exithook.go b/src/runtime/exithook.go index a94d4ede40..bb29a94724 100644 --- a/src/runtime/exithook.go +++ b/src/runtime/exithook.go @@ -54,6 +54,7 @@ func runExitHooks(exitCode int) { return } + finishPageTrace() for i := range exitHooks.hooks { h := exitHooks.hooks[len(exitHooks.hooks)-i-1] if exitCode != 0 && !h.runOnNonZeroExit { diff --git a/src/runtime/export_unix_test.go b/src/runtime/export_unix_test.go index 0119538787..71a55d8941 100644 --- a/src/runtime/export_unix_test.go +++ b/src/runtime/export_unix_test.go @@ -89,3 +89,9 @@ func waitForSigusr1Callback(gp *g) bool { func SendSigusr1(mp *M) { signalM(mp, _SIGUSR1) } + +const ( + O_WRONLY = _O_WRONLY + O_CREAT = _O_CREAT + O_TRUNC = _O_TRUNC +) diff --git a/src/runtime/extern.go b/src/runtime/extern.go index 3a48588aec..6c41c62694 100644 --- a/src/runtime/extern.go +++ b/src/runtime/extern.go @@ -127,6 +127,13 @@ It is a comma-separated list of name=val pairs setting these named variables: When set to 0 memory profiling is disabled. Refer to the description of MemProfileRate for the default value. + pagetrace: setting pagetrace=/path/to/file will write out a trace of page events + that can be viewed, analyzed, and visualized using the x/debug/cmd/pagetrace tool. + Build your program with GOEXPERIMENT=pagetrace to enable this functionality. Do not + enable this functionality if your program is a setuid binary as it introduces a security + risk in that scenario. Currently not supported on Windows, plan9 or js/wasm. Setting this + option for some applications can produce large traces, so use with care. + invalidptr: invalidptr=1 (the default) causes the garbage collector and stack copier to crash the program if an invalid pointer value (for example, 1) is found in a pointer-typed location. Setting invalidptr=0 disables this check. diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go index b4d8e71db6..e59340ec7c 100644 --- a/src/runtime/mgcscavenge.go +++ b/src/runtime/mgcscavenge.go @@ -747,6 +747,8 @@ func (p *pageAlloc) scavengeOne(ci chunkIdx, searchIdx uint, max uintptr) uintpt unlock(p.mheapLock) if !p.test { + pageTraceScav(getg().m.p.ptr(), 0, addr, uintptr(npages)) + // Only perform the actual scavenging if we're not in a test. // It's dangerous to do so otherwise. sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize) diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index d6d90d4da3..1401e92f4e 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -1306,6 +1306,7 @@ HaveSpan: // There are a few very limited cirumstances where we won't have a P here. // It's OK to simply skip scavenging in these cases. Something else will notice // and pick up the tab. + var now int64 if pp != nil && bytesToScavenge > 0 { // Measure how long we spent scavenging and add that measurement to the assist // time so we can track it for the GC CPU limiter. @@ -1321,7 +1322,7 @@ HaveSpan: }) // Finish up accounting. - now := nanotime() + now = nanotime() if track { pp.limiterEvent.stop(limiterEventScavengeAssist, now) } @@ -1360,6 +1361,7 @@ HaveSpan: } memstats.heapStats.release() + pageTraceAlloc(pp, now, base, npages) return s } @@ -1535,6 +1537,8 @@ func (h *mheap) grow(npage uintptr) (uintptr, bool) { // Free the span back into the heap. func (h *mheap) freeSpan(s *mspan) { systemstack(func() { + pageTraceFree(getg().m.p.ptr(), 0, s.base(), s.npages) + lock(&h.lock) if msanenabled { // Tell msan that this entire span is no longer in use. @@ -1565,6 +1569,8 @@ func (h *mheap) freeSpan(s *mspan) { // //go:systemstack func (h *mheap) freeManual(s *mspan, typ spanAllocType) { + pageTraceFree(getg().m.p.ptr(), 0, s.base(), s.npages) + s.needzero = 1 lock(&h.lock) h.freeSpanLocked(s, typ) diff --git a/src/runtime/pagetrace_off.go b/src/runtime/pagetrace_off.go new file mode 100644 index 0000000000..10b44d40ce --- /dev/null +++ b/src/runtime/pagetrace_off.go @@ -0,0 +1,28 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !goexperiment.pagetrace + +package runtime + +//go:systemstack +func pageTraceAlloc(pp *p, now int64, base, npages uintptr) { +} + +//go:systemstack +func pageTraceFree(pp *p, now int64, base, npages uintptr) { +} + +//go:systemstack +func pageTraceScav(pp *p, now int64, base, npages uintptr) { +} + +type pageTraceBuf struct { +} + +func initPageTrace(env string) { +} + +func finishPageTrace() { +} diff --git a/src/runtime/pagetrace_on.go b/src/runtime/pagetrace_on.go new file mode 100644 index 0000000000..0e621cb6ca --- /dev/null +++ b/src/runtime/pagetrace_on.go @@ -0,0 +1,358 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.pagetrace + +// Page tracer. +// +// This file contains an implementation of page trace instrumentation for tracking +// the way the Go runtime manages pages of memory. The trace may be enabled at program +// startup with the GODEBUG option pagetrace. +// +// Each page trace event is either 8 or 16 bytes wide. The first +// 8 bytes follow this format for non-sync events: +// +// [16 timestamp delta][35 base address][10 npages][1 isLarge][2 pageTraceEventType] +// +// If the "large" bit is set then the event is 16 bytes wide with the second 8 byte word +// containing the full npages value (the npages bitfield is 0). +// +// The base address's bottom pageShift bits are always zero hence why we can pack other +// data in there. We ignore the top 16 bits, assuming a 48 bit address space for the +// heap. +// +// The timestamp delta is computed from the difference between the current nanotime +// timestamp and the last sync event's timestamp. The bottom pageTraceTimeLostBits of +// this delta is removed and only the next pageTraceTimeDeltaBits are kept. +// +// A sync event is emitted at the beginning of each trace buffer and whenever the +// timestamp delta would not fit in an event. +// +// Sync events have the following structure: +// +// [61 timestamp or P ID][1 isPID][2 pageTraceSyncEvent] +// +// In essence, the "large" bit repurposed to indicate whether it's a timestamp or a P ID +// (these are typically uint32). Note that we only have 61 bits for the 64-bit timestamp, +// but like for the delta we drop the bottom pageTraceTimeLostBits here as well. + +package runtime + +import ( + "runtime/internal/sys" + "unsafe" +) + +// pageTraceAlloc records a page trace allocation event. +// pp may be nil. Call only if debug.pagetracefd != 0. +// +// Must run on the system stack as a crude way to prevent preemption. +// +//go:systemstack +func pageTraceAlloc(pp *p, now int64, base, npages uintptr) { + if pageTrace.enabled { + if now == 0 { + now = nanotime() + } + pageTraceEmit(pp, now, base, npages, pageTraceAllocEvent) + } +} + +// pageTraceFree records a page trace free event. +// pp may be nil. Call only if debug.pagetracefd != 0. +// +// Must run on the system stack as a crude way to prevent preemption. +// +//go:systemstack +func pageTraceFree(pp *p, now int64, base, npages uintptr) { + if pageTrace.enabled { + if now == 0 { + now = nanotime() + } + pageTraceEmit(pp, now, base, npages, pageTraceFreeEvent) + } +} + +// pageTraceScav records a page trace scavenge event. +// pp may be nil. Call only if debug.pagetracefd != 0. +// +// Must run on the system stack as a crude way to prevent preemption. +// +//go:systemstack +func pageTraceScav(pp *p, now int64, base, npages uintptr) { + if pageTrace.enabled { + if now == 0 { + now = nanotime() + } + pageTraceEmit(pp, now, base, npages, pageTraceScavEvent) + } +} + +// pageTraceEventType is a page trace event type. +type pageTraceEventType uint8 + +const ( + pageTraceSyncEvent pageTraceEventType = iota // Timestamp emission. + pageTraceAllocEvent // Allocation of pages. + pageTraceFreeEvent // Freeing pages. + pageTraceScavEvent // Scavenging pages. +) + +// pageTraceEmit emits a page trace event. +// +// Must run on the system stack as a crude way to prevent preemption. +// +//go:systemstack +func pageTraceEmit(pp *p, now int64, base, npages uintptr, typ pageTraceEventType) { + // Get a buffer. + var tbp *pageTraceBuf + pid := int32(-1) + if pp == nil { + // We have no P, so take the global buffer. + lock(&pageTrace.lock) + tbp = &pageTrace.buf + } else { + tbp = &pp.pageTraceBuf + pid = pp.id + } + + // Initialize the buffer if necessary. + tb := *tbp + if tb.buf == nil { + tb.buf = (*pageTraceEvents)(sysAlloc(pageTraceBufSize, &memstats.other_sys)) + tb = tb.writePid(pid) + } + + // Handle timestamp and emit a sync event if necessary. + if now < tb.timeBase { + now = tb.timeBase + } + if now-tb.timeBase >= pageTraceTimeMaxDelta { + tb.timeBase = now + tb = tb.writeSync(pid) + } + + // Emit the event. + tb = tb.writeEvent(pid, now, base, npages, typ) + + // Write back the buffer. + *tbp = tb + if pp == nil { + unlock(&pageTrace.lock) + } +} + +const ( + pageTraceBufSize = 32 << 10 + + // These constants describe the per-event timestamp delta encoding. + pageTraceTimeLostBits = 7 // How many bits of precision we lose in the delta. + pageTraceTimeDeltaBits = 16 // Size of the delta in bits. + pageTraceTimeMaxDelta = 1 << (pageTraceTimeLostBits + pageTraceTimeDeltaBits) +) + +// pageTraceEvents is the low-level buffer containing the trace data. +type pageTraceEvents struct { + _ sys.NotInHeap + events [pageTraceBufSize / 8]uint64 +} + +// pageTraceBuf is a wrapper around pageTraceEvents that knows how to write events +// to the buffer. It tracks state necessary to do so. +type pageTraceBuf struct { + buf *pageTraceEvents + len int // How many events have been written so far. + timeBase int64 // The current timestamp base from which deltas are produced. + finished bool // Whether this trace buf should no longer flush anything out. +} + +// writePid writes a P ID event indicating which P we're running on. +// +// Assumes there's always space in the buffer since this is only called at the +// beginning of a new buffer. +// +// Must run on the system stack as a crude way to prevent preemption. +// +//go:systemstack +func (tb pageTraceBuf) writePid(pid int32) pageTraceBuf { + e := uint64(int64(pid))<<3 | 0b100 | uint64(pageTraceSyncEvent) + tb.buf.events[tb.len] = e + tb.len++ + return tb +} + +// writeSync writes a sync event, which is just a timestamp. Handles flushing. +// +// Must run on the system stack as a crude way to prevent preemption. +// +//go:systemstack +func (tb pageTraceBuf) writeSync(pid int32) pageTraceBuf { + if tb.len+1 > len(tb.buf.events) { + // N.B. flush will writeSync again. + return tb.flush(pid, tb.timeBase) + } + e := ((uint64(tb.timeBase) >> pageTraceTimeLostBits) << 3) | uint64(pageTraceSyncEvent) + tb.buf.events[tb.len] = e + tb.len++ + return tb +} + +// writeEvent handles writing all non-sync and non-pid events. Handles flushing if necessary. +// +// pid indicates the P we're currently running on. Necessary in case we need to flush. +// now is the current nanotime timestamp. +// base is the base address of whatever group of pages this event is happening to. +// npages is the length of the group of pages this event is happening to. +// typ is the event that's happening to these pages. +// +// Must run on the system stack as a crude way to prevent preemption. +// +//go:systemstack +func (tb pageTraceBuf) writeEvent(pid int32, now int64, base, npages uintptr, typ pageTraceEventType) pageTraceBuf { + large := 0 + np := npages + if npages >= 1024 { + large = 1 + np = 0 + } + if tb.len+1+large > len(tb.buf.events) { + tb = tb.flush(pid, now) + } + if base%pageSize != 0 { + throw("base address not page aligned") + } + e := uint64(base) + // The pageShift low-order bits are zero. + e |= uint64(typ) // 2 bits + e |= uint64(large) << 2 // 1 bit + e |= uint64(np) << 3 // 10 bits + // Write the timestamp delta in the upper pageTraceTimeDeltaBits. + e |= uint64((now-tb.timeBase)>>pageTraceTimeLostBits) << (64 - pageTraceTimeDeltaBits) + tb.buf.events[tb.len] = e + if large != 0 { + // npages doesn't fit in 10 bits, so write an additional word with that data. + tb.buf.events[tb.len+1] = uint64(npages) + } + tb.len += 1 + large + return tb +} + +// flush writes out the contents of the buffer to pageTrace.fd and resets the buffer. +// It then writes out a P ID event and the first sync event for the new buffer. +// +// Must run on the system stack as a crude way to prevent preemption. +// +//go:systemstack +func (tb pageTraceBuf) flush(pid int32, now int64) pageTraceBuf { + if !tb.finished { + lock(&pageTrace.fdLock) + writeFull(uintptr(pageTrace.fd), (*byte)(unsafe.Pointer(&tb.buf.events[0])), tb.len*8) + unlock(&pageTrace.fdLock) + } + tb.len = 0 + tb.timeBase = now + return tb.writePid(pid).writeSync(pid) +} + +var pageTrace struct { + // enabled indicates whether tracing is enabled. If true, fd >= 0. + // + // Safe to read without synchronization because it's only set once + // at program initialization. + enabled bool + + // buf is the page trace buffer used if there is no P. + // + // lock protects buf. + lock mutex + buf pageTraceBuf + + // fdLock protects writing to fd. + // + // fd is the file to write the page trace to. + fdLock mutex + fd int32 +} + +// initPageTrace initializes the page tracing infrastructure from GODEBUG. +// +// env must be the value of the GODEBUG environment variable. +func initPageTrace(env string) { + var value string + for env != "" { + elt, rest := env, "" + for i := 0; i < len(env); i++ { + if env[i] == ',' { + elt, rest = env[:i], env[i+1:] + break + } + } + env = rest + if hasPrefix(elt, "pagetrace=") { + value = elt[len("pagetrace="):] + break + } + } + pageTrace.fd = -1 + if canCreateFile && value != "" { + var tmp [4096]byte + if len(value) != 0 && len(value) < 4096 { + copy(tmp[:], value) + pageTrace.fd = create(&tmp[0], 0o664) + } + } + pageTrace.enabled = pageTrace.fd >= 0 +} + +// finishPageTrace flushes all P's trace buffers and disables page tracing. +func finishPageTrace() { + if !pageTrace.enabled { + return + } + // Grab worldsema as we're about to execute a ragged barrier. + semacquire(&worldsema) + systemstack(func() { + // Disable tracing. This isn't strictly necessary and it's best-effort. + pageTrace.enabled = false + + // Execute a ragged barrier, flushing each trace buffer. + forEachP(func(pp *p) { + if pp.pageTraceBuf.buf != nil { + pp.pageTraceBuf = pp.pageTraceBuf.flush(pp.id, nanotime()) + } + pp.pageTraceBuf.finished = true + }) + + // Write the global have-no-P buffer. + lock(&pageTrace.lock) + if pageTrace.buf.buf != nil { + pageTrace.buf = pageTrace.buf.flush(-1, nanotime()) + } + pageTrace.buf.finished = true + unlock(&pageTrace.lock) + + // Safely close the file as nothing else should be allowed to write to the fd. + lock(&pageTrace.fdLock) + closefd(pageTrace.fd) + pageTrace.fd = -1 + unlock(&pageTrace.fdLock) + }) + semrelease(&worldsema) +} + +// writeFull ensures that a complete write of bn bytes from b is made to fd. +func writeFull(fd uintptr, b *byte, bn int) { + for bn > 0 { + n := write(fd, unsafe.Pointer(b), int32(bn)) + if n == -_EINTR || n == -_EAGAIN { + continue + } + if n < 0 { + print("errno=", -n, "\n") + throw("writeBytes: bad write") + } + bn -= int(n) + b = addb(b, uintptr(n)) + } +} diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 201ed537ba..176399c7eb 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -612,16 +612,39 @@ const ( _GoidCacheBatch = 16 ) -// cpuinit extracts the environment variable GODEBUG from the environment on -// Unix-like operating systems and calls internal/cpu.Initialize. -func cpuinit() { - const prefix = "GODEBUG=" - var env string - +// cpuinit sets up CPU feature flags and calls internal/cpu.Initialize. env should be the complete +// value of the GODEBUG environment variable. +func cpuinit(env string) { switch GOOS { case "aix", "darwin", "ios", "dragonfly", "freebsd", "netbsd", "openbsd", "illumos", "solaris", "linux": cpu.DebugOptions = true + } + cpu.Initialize(env) + + // Support cpu feature variables are used in code generated by the compiler + // to guard execution of instructions that can not be assumed to be always supported. + switch GOARCH { + case "386", "amd64": + x86HasPOPCNT = cpu.X86.HasPOPCNT + x86HasSSE41 = cpu.X86.HasSSE41 + x86HasFMA = cpu.X86.HasFMA + + case "arm": + armHasVFPv4 = cpu.ARM.HasVFPv4 + + case "arm64": + arm64HasATOMICS = cpu.ARM64.HasATOMICS + } +} +// getGodebugEarly extracts the environment variable GODEBUG from the environment on +// Unix-like operating systems and returns it. This function exists to extract GODEBUG +// early before much of the runtime is initialized. +func getGodebugEarly() string { + const prefix = "GODEBUG=" + var env string + switch GOOS { + case "aix", "darwin", "ios", "dragonfly", "freebsd", "netbsd", "openbsd", "illumos", "solaris", "linux": // Similar to goenv_unix but extracts the environment value for // GODEBUG directly. // TODO(moehrmann): remove when general goenvs() can be called before cpuinit() @@ -640,23 +663,7 @@ func cpuinit() { } } } - - cpu.Initialize(env) - - // Support cpu feature variables are used in code generated by the compiler - // to guard execution of instructions that can not be assumed to be always supported. - switch GOARCH { - case "386", "amd64": - x86HasPOPCNT = cpu.X86.HasPOPCNT - x86HasSSE41 = cpu.X86.HasSSE41 - x86HasFMA = cpu.X86.HasFMA - - case "arm": - armHasVFPv4 = cpu.ARM.HasVFPv4 - - case "arm64": - arm64HasATOMICS = cpu.ARM64.HasATOMICS - } + return env } // The bootstrap sequence is: @@ -703,9 +710,11 @@ func schedinit() { moduledataverify() stackinit() mallocinit() - cpuinit() // must run before alginit - alginit() // maps, hash, fastrand must not be used before this call - fastrandinit() // must run before mcommoninit + godebug := getGodebugEarly() + initPageTrace(godebug) // must run after mallocinit but before anything allocates + cpuinit(godebug) // must run before alginit + alginit() // maps, hash, fastrand must not be used before this call + fastrandinit() // must run before mcommoninit mcommoninit(gp.m, -1) modulesinit() // provides activeModules typelinksinit() // uses maps, activeModules diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index ceb6ff66e2..9381d1e3f7 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -754,6 +754,11 @@ type p struct { // scheduler ASAP (regardless of what G is running on it). preempt bool + // pageTraceBuf is a buffer for writing out page allocation/free/scavenge traces. + // + // Used only if GOEXPERIMENT=pagetrace. + pageTraceBuf pageTraceBuf + // Padding is no longer needed. False sharing is now not a worry because p is large enough // that its size class is an integer multiple of the cache line size (for any of our architectures). } diff --git a/src/runtime/syscall_unix_test.go b/src/runtime/syscall_unix_test.go new file mode 100644 index 0000000000..2a69c40326 --- /dev/null +++ b/src/runtime/syscall_unix_test.go @@ -0,0 +1,25 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build unix + +package runtime_test + +import ( + "runtime" + "syscall" + "testing" +) + +func TestSyscallFlagAlignment(t *testing.T) { + // TODO(mknyszek): Check other flags. + check := func(name string, got, want int) { + if got != want { + t.Errorf("flag %s does not line up: got %d, want %d", name, got, want) + } + } + check("O_WRONLY", runtime.O_WRONLY, syscall.O_WRONLY) + check("O_CREAT", runtime.O_CREAT, syscall.O_CREAT) + check("O_TRUNC", runtime.O_TRUNC, syscall.O_TRUNC) +}