From: Jason A. Donenfeld Date: Sun, 29 Sep 2024 02:24:00 +0000 (+0200) Subject: runtime: align vgetrandom states to cache line X-Git-Tag: go1.24rc1~796 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=86781963cc2bf0d3dbd1eccebd8a2f080846f3ee;p=gostls13.git runtime: align vgetrandom states to cache line This prevents false sharing, which makes a large difference on machines with several NUMA nodes, such as this dual socket server: cpu: Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz │ sec/op │ sec/op vs base │ ParallelGetRandom-128 0.7944n ± 5% 0.4503n ± 0% -43.31% (p=0.000 n=10) │ B/s │ B/s vs base │ ParallelGetRandom-128 4.690Gi ± 5% 8.272Gi ± 0% +76.38% (p=0.000 n=10) Change-Id: Id4421e9a4c190b38aff0be4c59e9067b0a38ccd7 Reviewed-on: https://go-review.googlesource.com/c/go/+/616535 LUCI-TryBot-Result: Go LUCI Auto-Submit: Jason Donenfeld Reviewed-by: David Chase Reviewed-by: Michael Pratt --- diff --git a/src/internal/syscall/unix/getrandom_linux_test.go b/src/internal/syscall/unix/getrandom_linux_test.go new file mode 100644 index 0000000000..e1778c19e8 --- /dev/null +++ b/src/internal/syscall/unix/getrandom_linux_test.go @@ -0,0 +1,22 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package unix_test + +import ( + "internal/syscall/unix" + "testing" +) + +func BenchmarkParallelGetRandom(b *testing.B) { + b.SetBytes(4) + b.RunParallel(func(pb *testing.PB) { + var buf [4]byte + for pb.Next() { + if _, err := unix.GetRandom(buf[:], 0); err != nil { + b.Fatal(err) + } + } + }) +} diff --git a/src/runtime/vgetrandom_linux.go b/src/runtime/vgetrandom_linux.go index 1e8c8ceaf0..af0e9cad1e 100644 --- a/src/runtime/vgetrandom_linux.go +++ b/src/runtime/vgetrandom_linux.go @@ -6,7 +6,10 @@ package runtime -import "unsafe" +import ( + "internal/cpu" + "unsafe" +) func vgetrandom1(buf *byte, length uintptr, flags uint32, state uintptr, stateSize uintptr) int @@ -43,8 +46,9 @@ func vgetrandomGetState() uintptr { lock(&vgetrandomAlloc.statesLock) if len(vgetrandomAlloc.states) == 0 { num := uintptr(ncpu) // Just a reasonable size hint to start. - allocSize := (num*vgetrandomAlloc.stateSize + physPageSize - 1) &^ (physPageSize - 1) - num = (physPageSize / vgetrandomAlloc.stateSize) * (allocSize / physPageSize) + stateSizeCacheAligned := (vgetrandomAlloc.stateSize + cpu.CacheLineSize - 1) &^ (cpu.CacheLineSize - 1) + allocSize := (num*stateSizeCacheAligned + physPageSize - 1) &^ (physPageSize - 1) + num = (physPageSize / stateSizeCacheAligned) * (allocSize / physPageSize) p, err := mmap(nil, allocSize, vgetrandomAlloc.mmapProt, vgetrandomAlloc.mmapFlags, -1, 0) if err != 0 { unlock(&vgetrandomAlloc.statesLock) @@ -59,7 +63,7 @@ func vgetrandomGetState() uintptr { newBlock = (newBlock + physPageSize - 1) &^ (physPageSize - 1) } vgetrandomAlloc.states = append(vgetrandomAlloc.states, newBlock) - newBlock += vgetrandomAlloc.stateSize + newBlock += stateSizeCacheAligned } } state := vgetrandomAlloc.states[len(vgetrandomAlloc.states)-1]