It appears that windows osyield is just 15ms sleep on my computer
(see benchmarks below). Replace NtWaitForSingleObject in osyield
with SwitchToThread (as suggested by Dmitry).
Also add issue #14790 related benchmarks, so we can track perfomance
changes in CL 20834 and CL 20835 and beyond.
Update #14790
benchmark old ns/op new ns/op delta
BenchmarkChanToSyscallPing1ms
1953200 1953000 -0.01%
BenchmarkChanToSyscallPing15ms
31562904 31248400 -1.00%
BenchmarkSyscallToSyscallPing1ms 5247 4202 -19.92%
BenchmarkSyscallToSyscallPing15ms 5260 4374 -16.84%
BenchmarkChanToChanPing1ms 474 494 +4.22%
BenchmarkChanToChanPing15ms 468 489 +4.49%
BenchmarkOsYield1ms 980018 75.5 -99.99%
BenchmarkOsYield15ms
15625200 75.8 -100.00%
Change-Id: I1b4cc7caca784e2548ee3c846ca07ef152ebedce
Reviewed-on: https://go-review.googlesource.com/21294
Run-TryBot: Alex Brainman <alex.brainman@gmail.com>
Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
Run-TryBot: Dmitry Vyukov <dvyukov@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
import "unsafe"
var TestingWER = &testingWER
+var OsYield = osyield
func NumberOfProcessors() int32 {
var info systeminfo
//go:cgo_import_dynamic runtime._SetUnhandledExceptionFilter SetUnhandledExceptionFilter%1 "kernel32.dll"
//go:cgo_import_dynamic runtime._SetWaitableTimer SetWaitableTimer%6 "kernel32.dll"
//go:cgo_import_dynamic runtime._SuspendThread SuspendThread%1 "kernel32.dll"
+//go:cgo_import_dynamic runtime._SwitchToThread SwitchToThread%0 "kernel32.dll"
//go:cgo_import_dynamic runtime._VirtualAlloc VirtualAlloc%4 "kernel32.dll"
//go:cgo_import_dynamic runtime._VirtualFree VirtualFree%3 "kernel32.dll"
//go:cgo_import_dynamic runtime._WSAGetOverlappedResult WSAGetOverlappedResult%5 "ws2_32.dll"
_SetUnhandledExceptionFilter,
_SetWaitableTimer,
_SuspendThread,
+ _SwitchToThread,
_VirtualAlloc,
_VirtualFree,
_WSAGetOverlappedResult,
func osinit() {
asmstdcallAddr = unsafe.Pointer(funcPC(asmstdcall))
+ usleep2Addr = unsafe.Pointer(funcPC(usleep2))
+ switchtothreadAddr = unsafe.Pointer(funcPC(switchtothread))
setBadSignalMsg()
}
// in sys_windows_386.s and sys_windows_amd64.s
-func usleep1(usec uint32)
+func onosstack(fn unsafe.Pointer, arg uint32)
+func usleep2(usec uint32)
+func switchtothread()
+
+var usleep2Addr unsafe.Pointer
+var switchtothreadAddr unsafe.Pointer
//go:nosplit
func osyield() {
- usleep1(1)
+ onosstack(switchtothreadAddr, 0)
}
//go:nosplit
func usleep(us uint32) {
// Have 1us units; want 100ns units.
- usleep1(10 * us)
+ onosstack(usleep2Addr, 10*us)
}
func ctrlhandler1(_type uint32) uint32 {
MOVL CX, 0x14(FS)
RET
-// Sleep duration is in 100ns units.
-TEXT runtime·usleep1(SB),NOSPLIT,$0
- MOVL usec+0(FP), BX
- MOVL $runtime·usleep2(SB), AX // to hide from 8l
+// onosstack calls fn on OS stack.
+// func onosstack(fn unsafe.Pointer, arg uint32)
+TEXT runtime·onosstack(SB),NOSPLIT,$0
+ MOVL fn+0(FP), AX // to hide from 8l
+ MOVL arg+4(FP), BX
// Execute call on m->g0 stack, in case we are not actually
// calling a system call wrapper, like when running under WINE.
MOVL BP, SP
RET
+// Runs on OS stack.
+TEXT runtime·switchtothread(SB),NOSPLIT,$0
+ MOVL SP, BP
+ MOVL runtime·_SwitchToThread(SB), AX
+ CALL AX
+ MOVL BP, SP
+ RET
+
// func now() (sec int64, nsec int32)
TEXT time·now(SB),NOSPLIT,$8-12
CALL runtime·unixnano(SB)
MOVQ DI, 0x28(GS)
RET
-// Sleep duration is in 100ns units.
-TEXT runtime·usleep1(SB),NOSPLIT,$0
- MOVL usec+0(FP), BX
- MOVQ $runtime·usleep2(SB), AX // to hide from 6l
+// func onosstack(fn unsafe.Pointer, arg uint32)
+TEXT runtime·onosstack(SB),NOSPLIT,$0
+ MOVQ fn+0(FP), AX // to hide from 6l
+ MOVL arg+8(FP), BX
// Execute call on m->g0 stack, in case we are not actually
// calling a system call wrapper, like when running under WINE.
MOVQ 40(SP), SP
RET
+// Runs on OS stack.
+TEXT runtime·switchtothread(SB),NOSPLIT,$0
+ MOVQ SP, AX
+ ANDQ $~15, SP // alignment as per Windows requirement
+ SUBQ $(48), SP // room for SP and 4 args as per Windows requirement
+ // plus one extra word to keep stack 16 bytes aligned
+ MOVQ AX, 32(SP)
+ MOVQ runtime·_SwitchToThread(SB), AX
+ CALL AX
+ MOVQ 32(SP), SP
+ RET
+
// func now() (sec int64, nsec int32)
TEXT time·now(SB),NOSPLIT,$8-12
CALL runtime·unixnano(SB)
t.Skipf("LoadLibraryEx not usable, but not expected. (LoadLibraryEx=%v; flags=%v)",
have, flags)
}
+
+var (
+ modwinmm = syscall.NewLazyDLL("winmm.dll")
+ modkernel32 = syscall.NewLazyDLL("kernel32.dll")
+
+ proctimeBeginPeriod = modwinmm.NewProc("timeBeginPeriod")
+ proctimeEndPeriod = modwinmm.NewProc("timeEndPeriod")
+
+ procCreateEvent = modkernel32.NewProc("CreateEventW")
+ procSetEvent = modkernel32.NewProc("SetEvent")
+)
+
+func timeBeginPeriod(period uint32) {
+ syscall.Syscall(proctimeBeginPeriod.Addr(), 1, uintptr(period), 0, 0)
+}
+
+func timeEndPeriod(period uint32) {
+ syscall.Syscall(proctimeEndPeriod.Addr(), 1, uintptr(period), 0, 0)
+}
+
+func createEvent() (syscall.Handle, error) {
+ r0, _, e0 := syscall.Syscall6(procCreateEvent.Addr(), 4, 0, 0, 0, 0, 0, 0)
+ if r0 == 0 {
+ return 0, syscall.Errno(e0)
+ }
+ return syscall.Handle(r0), nil
+}
+
+func setEvent(h syscall.Handle) error {
+ r0, _, e0 := syscall.Syscall(procSetEvent.Addr(), 1, uintptr(h), 0, 0)
+ if r0 == 0 {
+ return syscall.Errno(e0)
+ }
+ return nil
+}
+
+func benchChanToSyscallPing(b *testing.B) {
+ ch := make(chan int)
+ event, err := createEvent()
+ if err != nil {
+ b.Fatal(err)
+ }
+ go func() {
+ for i := 0; i < b.N; i++ {
+ syscall.WaitForSingleObject(event, syscall.INFINITE)
+ ch <- 1
+ }
+ }()
+ for i := 0; i < b.N; i++ {
+ err := setEvent(event)
+ if err != nil {
+ b.Fatal(err)
+ }
+ <-ch
+ }
+}
+
+func BenchmarkChanToSyscallPing1ms(b *testing.B) {
+ timeBeginPeriod(1)
+ benchChanToSyscallPing(b)
+ timeEndPeriod(1)
+}
+
+func BenchmarkChanToSyscallPing15ms(b *testing.B) {
+ benchChanToSyscallPing(b)
+}
+
+func benchSyscallToSyscallPing(b *testing.B) {
+ event1, err := createEvent()
+ if err != nil {
+ b.Fatal(err)
+ }
+ event2, err := createEvent()
+ if err != nil {
+ b.Fatal(err)
+ }
+ go func() {
+ for i := 0; i < b.N; i++ {
+ syscall.WaitForSingleObject(event1, syscall.INFINITE)
+ err := setEvent(event2)
+ if err != nil {
+ b.Fatal(err)
+ }
+ }
+ }()
+ for i := 0; i < b.N; i++ {
+ err := setEvent(event1)
+ if err != nil {
+ b.Fatal(err)
+ }
+ syscall.WaitForSingleObject(event2, syscall.INFINITE)
+ }
+}
+
+func BenchmarkSyscallToSyscallPing1ms(b *testing.B) {
+ timeBeginPeriod(1)
+ benchSyscallToSyscallPing(b)
+ timeEndPeriod(1)
+}
+
+func BenchmarkSyscallToSyscallPing15ms(b *testing.B) {
+ benchSyscallToSyscallPing(b)
+}
+
+func benchChanToChanPing(b *testing.B) {
+ ch1 := make(chan int)
+ ch2 := make(chan int)
+ go func() {
+ for i := 0; i < b.N; i++ {
+ <-ch1
+ ch2 <- 1
+ }
+ }()
+ for i := 0; i < b.N; i++ {
+ ch1 <- 1
+ <-ch2
+ }
+}
+
+func BenchmarkChanToChanPing1ms(b *testing.B) {
+ timeBeginPeriod(1)
+ benchChanToChanPing(b)
+ timeEndPeriod(1)
+}
+
+func BenchmarkChanToChanPing15ms(b *testing.B) {
+ benchChanToChanPing(b)
+}
+
+func benchOsYield(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ runtime.OsYield()
+ }
+}
+
+func BenchmarkOsYield1ms(b *testing.B) {
+ timeBeginPeriod(1)
+ benchOsYield(b)
+ timeEndPeriod(1)
+}
+
+func BenchmarkOsYield15ms(b *testing.B) {
+ benchOsYield(b)
+}