From 35fa852d6dda69ce367e449fff78b3fc09834c97 Mon Sep 17 00:00:00 2001 From: qmuntal Date: Mon, 12 Feb 2024 12:38:37 +0100 Subject: [PATCH] runtime: use the right number of parameters in syscall_SyscallX on Windows The syscall_SyscallX functions currently discard the nargs parameter when calling syscall_SyscallN. This precludes some optimizations down the line. For example, on amd64, a syscall that takes 0 arguments don't need to set any of the params passing registers (CX, DX, R8, and R9). This CL updates all syscall_SyscallX functions so they call syscall_SyscallN with an argument slice of the right length. While here, remove the hack in syscall_SyscallN to support less than 4 arguments, and update instead asmstdcall on amd64 to properly handle this case. Change-Id: I0328e14f34c2b000fde06cc6a579b09e8c32f2b9 Reviewed-on: https://go-review.googlesource.com/c/go/+/563315 TryBot-Result: Gopher Robot LUCI-TryBot-Result: Go LUCI Run-TryBot: Quim Muntal Reviewed-by: Keith Randall Reviewed-by: Keith Randall Reviewed-by: Than McIntosh --- src/runtime/sys_windows_amd64.s | 33 ++++++------ src/runtime/syscall_windows.go | 93 ++++++++++++--------------------- 2 files changed, 51 insertions(+), 75 deletions(-) diff --git a/src/runtime/sys_windows_amd64.s b/src/runtime/sys_windows_amd64.s index c1b78e3976..56a2dc0bcf 100644 --- a/src/runtime/sys_windows_amd64.s +++ b/src/runtime/sys_windows_amd64.s @@ -33,14 +33,12 @@ TEXT runtime·asmstdcall(SB),NOSPLIT,$16 SUBQ $(const_maxArgs*8), SP // room for args - // Fast version, do not store args on the stack nor - // load them into registers. - CMPL CX, $0 - JE docall - // Fast version, do not store args on the stack. - CMPL CX, $4 - JLE loadregs + CMPL CX, $0; JE _0args + CMPL CX, $1; JE _1args + CMPL CX, $2; JE _2args + CMPL CX, $3; JE _3args + CMPL CX, $4; JE _4args // Check we have enough room for args. CMPL CX, $const_maxArgs @@ -53,22 +51,25 @@ TEXT runtime·asmstdcall(SB),NOSPLIT,$16 REP; MOVSQ MOVQ SP, SI -loadregs: // Load first 4 args into correspondent registers. - MOVQ 0(SI), CX - MOVQ 8(SI), DX - MOVQ 16(SI), R8 - MOVQ 24(SI), R9 // Floating point arguments are passed in the XMM // registers. Set them here in case any of the arguments // are floating point values. For details see // https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170 - MOVQ CX, X0 - MOVQ DX, X1 - MOVQ R8, X2 +_4args: + MOVQ 24(SI), R9 MOVQ R9, X3 +_3args: + MOVQ 16(SI), R8 + MOVQ R8, X2 +_2args: + MOVQ 8(SI), DX + MOVQ DX, X1 +_1args: + MOVQ 0(SI), CX + MOVQ CX, X0 +_0args: -docall: // Call stdcall function. CALL AX diff --git a/src/runtime/syscall_windows.go b/src/runtime/syscall_windows.go index ba88e93d7d..7abaea11c8 100644 --- a/src/runtime/syscall_windows.go +++ b/src/runtime/syscall_windows.go @@ -415,63 +415,36 @@ const _LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 //go:linkname syscall_loadsystemlibrary syscall.loadsystemlibrary //go:nosplit -//go:cgo_unsafe_args func syscall_loadsystemlibrary(filename *uint16) (handle, err uintptr) { - lockOSThread() - c := &getg().m.syscall - c.fn = getLoadLibraryEx() - c.n = 3 - args := struct { - lpFileName *uint16 - hFile uintptr // always 0 - flags uint32 - }{filename, 0, _LOAD_LIBRARY_SEARCH_SYSTEM32} - c.args = uintptr(noescape(unsafe.Pointer(&args))) - - cgocall(asmstdcallAddr, unsafe.Pointer(c)) + fn := getLoadLibraryEx() + handle, _, err = syscall_SyscallN(fn, uintptr(unsafe.Pointer(filename)), 0, _LOAD_LIBRARY_SEARCH_SYSTEM32) KeepAlive(filename) - handle = c.r1 - if handle == 0 { - err = c.err + if handle != 0 { + err = 0 } - unlockOSThread() // not defer'd after the lockOSThread above to save stack frame size. return } //go:linkname syscall_loadlibrary syscall.loadlibrary //go:nosplit -//go:cgo_unsafe_args func syscall_loadlibrary(filename *uint16) (handle, err uintptr) { - lockOSThread() - defer unlockOSThread() - c := &getg().m.syscall - c.fn = getLoadLibrary() - c.n = 1 - c.args = uintptr(noescape(unsafe.Pointer(&filename))) - cgocall(asmstdcallAddr, unsafe.Pointer(c)) + fn := getLoadLibrary() + handle, _, err = syscall_SyscallN(fn, uintptr(unsafe.Pointer(filename))) KeepAlive(filename) - handle = c.r1 - if handle == 0 { - err = c.err + if handle != 0 { + err = 0 } return } //go:linkname syscall_getprocaddress syscall.getprocaddress //go:nosplit -//go:cgo_unsafe_args func syscall_getprocaddress(handle uintptr, procname *byte) (outhandle, err uintptr) { - lockOSThread() - defer unlockOSThread() - c := &getg().m.syscall - c.fn = getGetProcAddress() - c.n = 2 - c.args = uintptr(noescape(unsafe.Pointer(&handle))) - cgocall(asmstdcallAddr, unsafe.Pointer(c)) + fn := getGetProcAddress() + outhandle, _, err = syscall_SyscallN(fn, handle, uintptr(unsafe.Pointer(procname))) KeepAlive(procname) - outhandle = c.r1 - if outhandle == 0 { - err = c.err + if outhandle != 0 { + err = 0 } return } @@ -479,37 +452,43 @@ func syscall_getprocaddress(handle uintptr, procname *byte) (outhandle, err uint //go:linkname syscall_Syscall syscall.Syscall //go:nosplit func syscall_Syscall(fn, nargs, a1, a2, a3 uintptr) (r1, r2, err uintptr) { - return syscall_SyscallN(fn, a1, a2, a3) + args := [...]uintptr{a1, a2, a3} + return syscall_SyscallN(fn, args[:nargs]...) } //go:linkname syscall_Syscall6 syscall.Syscall6 //go:nosplit func syscall_Syscall6(fn, nargs, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) { - return syscall_SyscallN(fn, a1, a2, a3, a4, a5, a6) + args := [...]uintptr{a1, a2, a3, a4, a5, a6} + return syscall_SyscallN(fn, args[:nargs]...) } //go:linkname syscall_Syscall9 syscall.Syscall9 //go:nosplit func syscall_Syscall9(fn, nargs, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r1, r2, err uintptr) { - return syscall_SyscallN(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9) + args := [...]uintptr{a1, a2, a3, a4, a5, a6, a7, a8, a9} + return syscall_SyscallN(fn, args[:nargs]...) } //go:linkname syscall_Syscall12 syscall.Syscall12 //go:nosplit func syscall_Syscall12(fn, nargs, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12 uintptr) (r1, r2, err uintptr) { - return syscall_SyscallN(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12) + args := [...]uintptr{a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12} + return syscall_SyscallN(fn, args[:nargs]...) } //go:linkname syscall_Syscall15 syscall.Syscall15 //go:nosplit func syscall_Syscall15(fn, nargs, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr) (r1, r2, err uintptr) { - return syscall_SyscallN(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15) + args := [...]uintptr{a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15} + return syscall_SyscallN(fn, args[:nargs]...) } //go:linkname syscall_Syscall18 syscall.Syscall18 //go:nosplit func syscall_Syscall18(fn, nargs, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18 uintptr) (r1, r2, err uintptr) { - return syscall_SyscallN(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18) + args := [...]uintptr{a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18} + return syscall_SyscallN(fn, args[:nargs]...) } // maxArgs should be divisible by 2, as Windows stack @@ -521,26 +500,22 @@ const maxArgs = 42 //go:linkname syscall_SyscallN syscall.SyscallN //go:nosplit -func syscall_SyscallN(trap uintptr, args ...uintptr) (r1, r2, err uintptr) { - nargs := len(args) - - // asmstdcall expects it can access the first 4 arguments - // to load them into registers. - var tmp [4]uintptr - switch { - case nargs < 4: - copy(tmp[:], args) - args = tmp[:] - case nargs > maxArgs: +func syscall_SyscallN(fn uintptr, args ...uintptr) (r1, r2, err uintptr) { + if len(args) > maxArgs { panic("runtime: SyscallN has too many arguments") } + // The cgocall parameters are stored in m instead of in + // the stack because the stack can move during if fn + // calls back into Go. lockOSThread() defer unlockOSThread() c := &getg().m.syscall - c.fn = trap - c.n = uintptr(nargs) - c.args = uintptr(noescape(unsafe.Pointer(&args[0]))) + c.fn = fn + c.n = uintptr(len(args)) + if c.n != 0 { + c.args = uintptr(noescape(unsafe.Pointer(&args[0]))) + } cgocall(asmstdcallAddr, unsafe.Pointer(c)) return c.r1, c.r2, c.err } -- 2.48.1