From e347c89598d502dd82a3dbfc93c7d004e6d91f9d Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 3 Nov 2021 18:19:04 +0100 Subject: [PATCH] [release-branch.go1.17] runtime: on windows, read nanotime with one instruction or issue barrier On 64-bit, this is more efficient, and on ARM64, this prevents the time from moving backwards due to the weaker memory model. On ARM32 due to the weaker memory model, we issue a memory barrier. Updates #48072. Updates #49369. Change-Id: If4695716c3039d8af14e14808af217f5c99fc93a Reviewed-on: https://go-review.googlesource.com/c/go/+/361057 Trust: Jason A. Donenfeld Run-TryBot: Jason A. Donenfeld TryBot-Result: Go Bot Reviewed-by: Austin Clements Reviewed-on: https://go-review.googlesource.com/c/go/+/361475 Reviewed-by: Patrik Nyblom --- src/runtime/sys_windows_amd64.s | 13 +++---------- src/runtime/sys_windows_arm.s | 2 ++ src/runtime/sys_windows_arm64.s | 10 +--------- src/runtime/time_windows.h | 1 + src/runtime/time_windows_amd64.s | 19 +++---------------- src/runtime/time_windows_arm.s | 4 ++++ src/runtime/time_windows_arm64.s | 22 +++------------------- 7 files changed, 17 insertions(+), 54 deletions(-) diff --git a/src/runtime/sys_windows_amd64.s b/src/runtime/sys_windows_amd64.s index e7782846b2..64fa6791f4 100644 --- a/src/runtime/sys_windows_amd64.s +++ b/src/runtime/sys_windows_amd64.s @@ -348,16 +348,9 @@ TEXT runtime·nanotime1(SB),NOSPLIT,$0-8 CMPB runtime·useQPCTime(SB), $0 JNE useQPC MOVQ $_INTERRUPT_TIME, DI -loop: - MOVL time_hi1(DI), AX - MOVL time_lo(DI), BX - MOVL time_hi2(DI), CX - CMPL AX, CX - JNE loop - SHLQ $32, CX - ORQ BX, CX - IMULQ $100, CX - MOVQ CX, ret+0(FP) + MOVQ time_lo(DI), AX + IMULQ $100, AX + MOVQ AX, ret+0(FP) RET useQPC: JMP runtime·nanotimeQPC(SB) diff --git a/src/runtime/sys_windows_arm.s b/src/runtime/sys_windows_arm.s index 48f8c7dedf..d7ad244161 100644 --- a/src/runtime/sys_windows_arm.s +++ b/src/runtime/sys_windows_arm.s @@ -350,7 +350,9 @@ TEXT runtime·nanotime1(SB),NOSPLIT|NOFRAME,$0-8 MOVW $_INTERRUPT_TIME, R3 loop: MOVW time_hi1(R3), R1 + DMB MB_ISH MOVW time_lo(R3), R0 + DMB MB_ISH MOVW time_hi2(R3), R2 CMP R1, R2 BNE loop diff --git a/src/runtime/sys_windows_arm64.s b/src/runtime/sys_windows_arm64.s index 7a2e11f5ae..183128dd09 100644 --- a/src/runtime/sys_windows_arm64.s +++ b/src/runtime/sys_windows_arm64.s @@ -415,15 +415,7 @@ TEXT runtime·nanotime1(SB),NOSPLIT|NOFRAME,$0-8 CMP $0, R0 BNE useQPC MOVD $_INTERRUPT_TIME, R3 -loop: - MOVWU time_hi1(R3), R1 - MOVWU time_lo(R3), R0 - MOVWU time_hi2(R3), R2 - CMP R1, R2 - BNE loop - - // wintime = R1:R0, multiply by 100 - ORR R1<<32, R0 + MOVD time_lo(R3), R0 MOVD $100, R1 MUL R1, R0 MOVD R0, ret+0(FP) diff --git a/src/runtime/time_windows.h b/src/runtime/time_windows.h index cd16fd163b..7c2e65c328 100644 --- a/src/runtime/time_windows.h +++ b/src/runtime/time_windows.h @@ -9,6 +9,7 @@ // http://web.archive.org/web/20210411000829/https://wrkhpi.wordpress.com/2007/08/09/getting-os-information-the-kuser_shared_data-structure/ // Must read hi1, then lo, then hi2. The snapshot is valid if hi1 == hi2. +// Or, on 64-bit, just read lo:hi1 all at once atomically. #define _INTERRUPT_TIME 0x7ffe0008 #define _SYSTEM_TIME 0x7ffe0014 #define time_lo 0 diff --git a/src/runtime/time_windows_amd64.s b/src/runtime/time_windows_amd64.s index 93ab960b06..045f64eb46 100644 --- a/src/runtime/time_windows_amd64.s +++ b/src/runtime/time_windows_amd64.s @@ -12,27 +12,14 @@ TEXT time·now(SB),NOSPLIT,$0-24 CMPB runtime·useQPCTime(SB), $0 JNE useQPC + MOVQ $_INTERRUPT_TIME, DI -loop: - MOVL time_hi1(DI), AX - MOVL time_lo(DI), BX - MOVL time_hi2(DI), CX - CMPL AX, CX - JNE loop - SHLQ $32, AX - ORQ BX, AX + MOVQ time_lo(DI), AX IMULQ $100, AX MOVQ AX, mono+16(FP) MOVQ $_SYSTEM_TIME, DI -wall: - MOVL time_hi1(DI), AX - MOVL time_lo(DI), BX - MOVL time_hi2(DI), CX - CMPL AX, CX - JNE wall - SHLQ $32, AX - ORQ BX, AX + MOVQ time_lo(DI), AX MOVQ $116444736000000000, DI SUBQ DI, AX IMULQ $100, AX diff --git a/src/runtime/time_windows_arm.s b/src/runtime/time_windows_arm.s index 7c763b66ed..6552d75ff1 100644 --- a/src/runtime/time_windows_arm.s +++ b/src/runtime/time_windows_arm.s @@ -17,7 +17,9 @@ TEXT time·now(SB),NOSPLIT|NOFRAME,$0-20 MOVW $_INTERRUPT_TIME, R3 loop: MOVW time_hi1(R3), R1 + DMB MB_ISH MOVW time_lo(R3), R0 + DMB MB_ISH MOVW time_hi2(R3), R2 CMP R1, R2 BNE loop @@ -34,7 +36,9 @@ loop: MOVW $_SYSTEM_TIME, R3 wall: MOVW time_hi1(R3), R1 + DMB MB_ISH MOVW time_lo(R3), R0 + DMB MB_ISH MOVW time_hi2(R3), R2 CMP R1, R2 BNE wall diff --git a/src/runtime/time_windows_arm64.s b/src/runtime/time_windows_arm64.s index ef52ce4c99..e8a0eb2f93 100644 --- a/src/runtime/time_windows_arm64.s +++ b/src/runtime/time_windows_arm64.s @@ -13,34 +13,18 @@ TEXT time·now(SB),NOSPLIT|NOFRAME,$0-24 MOVB runtime·useQPCTime(SB), R0 CMP $0, R0 BNE useQPC - MOVD $_INTERRUPT_TIME, R3 -loop: - MOVWU time_hi1(R3), R1 - MOVWU time_lo(R3), R0 - MOVWU time_hi2(R3), R2 - CMP R1, R2 - BNE loop - // wintime = R1:R0, multiply by 100 - ORR R1<<32, R0 + MOVD $_INTERRUPT_TIME, R3 + MOVD time_lo(R3), R0 MOVD $100, R1 MUL R1, R0 MOVD R0, mono+16(FP) MOVD $_SYSTEM_TIME, R3 -wall: - MOVWU time_hi1(R3), R1 - MOVWU time_lo(R3), R0 - MOVWU time_hi2(R3), R2 - CMP R1, R2 - BNE wall - - // w = R1:R0 in 100ns units + MOVD time_lo(R3), R0 // convert to Unix epoch (but still 100ns units) #define delta 116444736000000000 - ORR R1<<32, R0 SUB $delta, R0 - // Convert to nSec MOVD $100, R1 MUL R1, R0 -- 2.48.1