From: Michael Anthony Knyszek Date: Mon, 22 May 2023 21:39:54 +0000 (+0000) Subject: runtime: fix race in BenchmarkSetType* benchmarks X-Git-Tag: go1.21rc1~168 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=b71d43296f87b3318a81cb808881d314391d101a;p=gostls13.git runtime: fix race in BenchmarkSetType* benchmarks Currently the BenchmarkSetType* benchmarks are racy: they call heapBitsSetType on an allocation that might be in a span in-use for allocation on another P. Because heap bits are bits but are written byte-wise non-atomically (because a P assumes it has total ownership of a span's bits), two threads can race writing the same heap bitmap byte creating incorrect metadata. Fix this by forcing every value we're writing heap bits for into a large object. Large object spans will never be written to concurrently unless they're freed first. Also, while we're here, refactor the benchmarks a bit. Use generics to eliminate the reflect nastiness in gc_test.go, and pass b.ResetTimer down into the test to get slightly more accurate results. Fixes #60050. Change-Id: Ib7d6249b321963367c8c8ca88385386c8ae9af1c Reviewed-on: https://go-review.googlesource.com/c/go/+/497215 Reviewed-by: Austin Clements Run-TryBot: Michael Knyszek Auto-Submit: Michael Knyszek TryBot-Result: Gopher Robot --- diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 5dc32ac5f7..5641005401 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -230,34 +230,126 @@ func SetEnvs(e []string) { envs = e } // For benchmarking. -func BenchSetType(n int, x any) { +// blockWrapper is a wrapper type that ensures a T is placed within a +// large object. This is necessary for safely benchmarking things +// that manipulate the heap bitmap, like heapBitsSetType. +// +// More specifically, allocating threads assume they're the sole writers +// to their span's heap bits, which allows those writes to be non-atomic. +// The heap bitmap is written byte-wise, so if one tried to call heapBitsSetType +// on an existing object in a small object span, we might corrupt that +// span's bitmap with a concurrent byte write to the heap bitmap. Large +// object spans contain exactly one object, so we can be sure no other P +// is going to be allocating from it concurrently, hence this wrapper type +// which ensures we have a T in a large object span. +type blockWrapper[T any] struct { + value T + _ [_MaxSmallSize]byte // Ensure we're a large object. +} + +func BenchSetType[T any](n int, resetTimer func()) { + x := new(blockWrapper[T]) + // Escape x to ensure it is allocated on the heap, as we are // working on the heap bits here. Escape(x) - e := *efaceOf(&x) + + // Grab the type. + var i any = *new(T) + e := *efaceOf(&i) + t := e._type + + // Benchmark setting the type bits for just the internal T of the block. + benchSetType(n, resetTimer, 1, unsafe.Pointer(&x.value), t) +} + +const maxArrayBlockWrapperLen = 32 + +// arrayBlockWrapper is like blockWrapper, but the interior value is intended +// to be used as a backing store for a slice. +type arrayBlockWrapper[T any] struct { + value [maxArrayBlockWrapperLen]T + _ [_MaxSmallSize]byte // Ensure we're a large object. +} + +// arrayLargeBlockWrapper is like arrayBlockWrapper, but the interior array +// accommodates many more elements. +type arrayLargeBlockWrapper[T any] struct { + value [1024]T + _ [_MaxSmallSize]byte // Ensure we're a large object. +} + +func BenchSetTypeSlice[T any](n int, resetTimer func(), len int) { + // We have two separate cases here because we want to avoid + // tests on big types but relatively small slices to avoid generating + // an allocation that's really big. This will likely force a GC which will + // skew the test results. + var y unsafe.Pointer + if len <= maxArrayBlockWrapperLen { + x := new(arrayBlockWrapper[T]) + // Escape x to ensure it is allocated on the heap, as we are + // working on the heap bits here. + Escape(x) + y = unsafe.Pointer(&x.value[0]) + } else { + x := new(arrayLargeBlockWrapper[T]) + Escape(x) + y = unsafe.Pointer(&x.value[0]) + } + + // Grab the type. + var i any = *new(T) + e := *efaceOf(&i) t := e._type - var size uintptr - var p unsafe.Pointer - switch t.Kind_ & kindMask { - case kindPtr: - t = (*ptrtype)(unsafe.Pointer(t)).Elem - size = t.Size_ - p = e.data - case kindSlice: - slice := *(*struct { - ptr unsafe.Pointer - len, cap uintptr - })(e.data) - t = (*slicetype)(unsafe.Pointer(t)).Elem - size = t.Size_ * slice.len - p = slice.ptr + + // Benchmark setting the type for a slice created from the array + // of T within the arrayBlock. + benchSetType(n, resetTimer, len, y, t) +} + +// benchSetType is the implementation of the BenchSetType* functions. +// x must be len consecutive Ts allocated within a large object span (to +// avoid a race on the heap bitmap). +// +// Note: this function cannot be generic. It would get its type from one of +// its callers (BenchSetType or BenchSetTypeSlice) whose type parameters are +// set by a call in the runtime_test package. That means this function and its +// callers will get instantiated in the package that provides the type argument, +// i.e. runtime_test. However, we call a function on the system stack. In race +// mode the runtime package is usually left uninstrumented because e.g. g0 has +// no valid racectx, but if we're instantiated in the runtime_test package, +// we might accidentally cause runtime code to be incorrectly instrumented. +func benchSetType(n int, resetTimer func(), len int, x unsafe.Pointer, t *_type) { + // Compute the input sizes. + size := t.Size() * uintptr(len) + + // Validate this function's invariant. + s := spanOfHeap(uintptr(x)) + if s == nil { + panic("no heap span for input") + } + if s.spanclass.sizeclass() != 0 { + panic("span is not a large object span") } + + // Round up the size to the size class to make the benchmark a little more + // realistic. However, validate it, to make sure this is safe. allocSize := roundupsize(size) + if s.npages*pageSize < allocSize { + panic("backing span not large enough for benchmark") + } + + // Benchmark heapBitsSetType by calling it in a loop. This is safe because + // x is in a large object span. + resetTimer() systemstack(func() { for i := 0; i < n; i++ { - heapBitsSetType(uintptr(p), allocSize, size, t) + heapBitsSetType(uintptr(x), allocSize, size, t) } }) + + // Make sure x doesn't get freed, since we're taking a uintptr. + KeepAlive(x) } const PtrSize = goarch.PtrSize diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index 0b2c972d3f..bd01e36103 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -308,35 +308,35 @@ func TestGCTestPointerClass(t *testing.T) { } func BenchmarkSetTypePtr(b *testing.B) { - benchSetType(b, new(*byte)) + benchSetType[*byte](b) } func BenchmarkSetTypePtr8(b *testing.B) { - benchSetType(b, new([8]*byte)) + benchSetType[[8]*byte](b) } func BenchmarkSetTypePtr16(b *testing.B) { - benchSetType(b, new([16]*byte)) + benchSetType[[16]*byte](b) } func BenchmarkSetTypePtr32(b *testing.B) { - benchSetType(b, new([32]*byte)) + benchSetType[[32]*byte](b) } func BenchmarkSetTypePtr64(b *testing.B) { - benchSetType(b, new([64]*byte)) + benchSetType[[64]*byte](b) } func BenchmarkSetTypePtr126(b *testing.B) { - benchSetType(b, new([126]*byte)) + benchSetType[[126]*byte](b) } func BenchmarkSetTypePtr128(b *testing.B) { - benchSetType(b, new([128]*byte)) + benchSetType[[128]*byte](b) } func BenchmarkSetTypePtrSlice(b *testing.B) { - benchSetType(b, make([]*byte, 1<<10)) + benchSetTypeSlice[*byte](b, 1<<10) } type Node1 struct { @@ -345,11 +345,11 @@ type Node1 struct { } func BenchmarkSetTypeNode1(b *testing.B) { - benchSetType(b, new(Node1)) + benchSetType[Node1](b) } func BenchmarkSetTypeNode1Slice(b *testing.B) { - benchSetType(b, make([]Node1, 32)) + benchSetTypeSlice[Node1](b, 32) } type Node8 struct { @@ -358,11 +358,11 @@ type Node8 struct { } func BenchmarkSetTypeNode8(b *testing.B) { - benchSetType(b, new(Node8)) + benchSetType[Node8](b) } func BenchmarkSetTypeNode8Slice(b *testing.B) { - benchSetType(b, make([]Node8, 32)) + benchSetTypeSlice[Node8](b, 32) } type Node64 struct { @@ -371,11 +371,11 @@ type Node64 struct { } func BenchmarkSetTypeNode64(b *testing.B) { - benchSetType(b, new(Node64)) + benchSetType[Node64](b) } func BenchmarkSetTypeNode64Slice(b *testing.B) { - benchSetType(b, make([]Node64, 32)) + benchSetTypeSlice[Node64](b, 32) } type Node64Dead struct { @@ -384,11 +384,11 @@ type Node64Dead struct { } func BenchmarkSetTypeNode64Dead(b *testing.B) { - benchSetType(b, new(Node64Dead)) + benchSetType[Node64Dead](b) } func BenchmarkSetTypeNode64DeadSlice(b *testing.B) { - benchSetType(b, make([]Node64Dead, 32)) + benchSetTypeSlice[Node64Dead](b, 32) } type Node124 struct { @@ -397,11 +397,11 @@ type Node124 struct { } func BenchmarkSetTypeNode124(b *testing.B) { - benchSetType(b, new(Node124)) + benchSetType[Node124](b) } func BenchmarkSetTypeNode124Slice(b *testing.B) { - benchSetType(b, make([]Node124, 32)) + benchSetTypeSlice[Node124](b, 32) } type Node126 struct { @@ -410,11 +410,11 @@ type Node126 struct { } func BenchmarkSetTypeNode126(b *testing.B) { - benchSetType(b, new(Node126)) + benchSetType[Node126](b) } func BenchmarkSetTypeNode126Slice(b *testing.B) { - benchSetType(b, make([]Node126, 32)) + benchSetTypeSlice[Node126](b, 32) } type Node128 struct { @@ -423,11 +423,11 @@ type Node128 struct { } func BenchmarkSetTypeNode128(b *testing.B) { - benchSetType(b, new(Node128)) + benchSetType[Node128](b) } func BenchmarkSetTypeNode128Slice(b *testing.B) { - benchSetType(b, make([]Node128, 32)) + benchSetTypeSlice[Node128](b, 32) } type Node130 struct { @@ -436,11 +436,11 @@ type Node130 struct { } func BenchmarkSetTypeNode130(b *testing.B) { - benchSetType(b, new(Node130)) + benchSetType[Node130](b) } func BenchmarkSetTypeNode130Slice(b *testing.B) { - benchSetType(b, make([]Node130, 32)) + benchSetTypeSlice[Node130](b, 32) } type Node1024 struct { @@ -449,24 +449,21 @@ type Node1024 struct { } func BenchmarkSetTypeNode1024(b *testing.B) { - benchSetType(b, new(Node1024)) + benchSetType[Node1024](b) } func BenchmarkSetTypeNode1024Slice(b *testing.B) { - benchSetType(b, make([]Node1024, 32)) + benchSetTypeSlice[Node1024](b, 32) } -func benchSetType(b *testing.B, x any) { - v := reflect.ValueOf(x) - t := v.Type() - switch t.Kind() { - case reflect.Pointer: - b.SetBytes(int64(t.Elem().Size())) - case reflect.Slice: - b.SetBytes(int64(t.Elem().Size()) * int64(v.Len())) - } - b.ResetTimer() - runtime.BenchSetType(b.N, x) +func benchSetType[T any](b *testing.B) { + b.SetBytes(int64(unsafe.Sizeof(*new(T)))) + runtime.BenchSetType[T](b.N, b.ResetTimer) +} + +func benchSetTypeSlice[T any](b *testing.B, len int) { + b.SetBytes(int64(unsafe.Sizeof(*new(T)) * uintptr(len))) + runtime.BenchSetTypeSlice[T](b.N, b.ResetTimer, len) } func BenchmarkAllocation(b *testing.B) {