From d09947522d1fbe17d6a8aece07d70e31a3a5311e Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Tue, 27 Apr 2021 16:46:53 -0700 Subject: [PATCH] runtime: implement time.now in assembly for linux-amd64 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit name old time/op new time/op delta AfterFunc-12 66.7µs ± 3% 66.8µs ± 4% ~ (p=0.836 n=20+20) After-12 99.4µs ± 4% 98.1µs ± 3% -1.31% (p=0.013 n=20+20) Stop-12 66.1µs ±12% 65.4µs ±10% ~ (p=0.602 n=20+20) SimultaneousAfterFunc-12 110µs ± 1% 114µs ± 2% +3.98% (p=0.000 n=19+18) StartStop-12 32.1µs ±15% 32.2µs ±13% ~ (p=0.620 n=20+20) Reset-12 3.66µs ± 2% 3.63µs ± 5% -0.92% (p=0.018 n=20+17) Sleep-12 134µs ± 1% 139µs ± 4% +3.97% (p=0.000 n=20+18) Ticker-12 32.8µs ± 1% 32.6µs ± 2% -0.63% (p=0.017 n=18+20) TickerReset-12 3.72µs ± 3% 3.71µs ± 3% ~ (p=0.753 n=20+20) TickerResetNaive-12 68.9µs ±11% 65.8µs ± 8% -4.44% (p=0.008 n=20+20) Now-12 33.3ns ± 1% 29.6ns ± 0% -11.06% (p=0.000 n=18+16) NowUnixNano-12 34.6ns ± 0% 31.2ns ± 0% -9.94% (p=0.000 n=20+17) NowUnixMilli-12 35.0ns ± 1% 30.9ns ± 0% -11.75% (p=0.000 n=19+15) NowUnixMicro-12 35.0ns ± 0% 30.9ns ± 0% -11.85% (p=0.000 n=20+19) Format-12 302ns ± 3% 306ns ± 3% +1.22% (p=0.009 n=20+20) FormatNow-12 184ns ± 5% 187ns ± 2% +1.25% (p=0.046 n=20+19) MarshalJSON-12 262ns ± 2% 270ns ± 3% +2.99% (p=0.000 n=20+20) MarshalText-12 262ns ± 3% 268ns ± 3% +2.37% (p=0.000 n=19+19) Parse-12 145ns ± 1% 148ns ± 0% +2.27% (p=0.000 n=18+19) ParseDuration-12 82.3ns ± 1% 79.7ns ± 1% -3.06% (p=0.000 n=20+20) Hour-12 4.48ns ± 1% 4.42ns ± 1% -1.32% (p=0.000 n=19+19) Second-12 4.44ns ± 1% 4.42ns ± 1% -0.39% (p=0.000 n=20+18) Year-12 11.2ns ± 1% 11.1ns ± 1% ~ (p=0.193 n=20+20) Day-12 14.8ns ± 0% 14.8ns ± 1% ~ (p=0.873 n=19+20) ISOWeek-12 17.2ns ± 0% 17.2ns ± 0% ~ (p=0.605 n=18+20) name old avg-late-ns new avg-late-ns delta ParallelTimerLatency-12 375k ± 3% 377k ± 3% ~ (p=0.445 n=20+20) StaggeredTickerLatency/work-dur=300µs/tickers-per-P=1-12 136k ± 2% 137k ± 2% ~ (p=0.242 n=20+20) StaggeredTickerLatency/work-dur=300µs/tickers-per-P=2-12 97.4k ±11% 96.4k ±10% ~ (p=0.336 n=19+20) StaggeredTickerLatency/work-dur=300µs/tickers-per-P=3-12 74.8k ± 3% 74.2k ± 3% ~ (p=0.158 n=20+19) StaggeredTickerLatency/work-dur=300µs/tickers-per-P=4-12 70.7k ± 7% 70.4k ± 6% ~ (p=0.879 n=20+19) StaggeredTickerLatency/work-dur=300µs/tickers-per-P=5-12 65.8k ± 9% 66.3k ±14% ~ (p=0.594 n=17+19) StaggeredTickerLatency/work-dur=300µs/tickers-per-P=6-12 55.5k ±29% 56.7k ±30% ~ (p=0.758 n=20+20) StaggeredTickerLatency/work-dur=300µs/tickers-per-P=7-12 45.3k ±29% 43.6k ±33% ~ (p=0.212 n=19+19) StaggeredTickerLatency/work-dur=300µs/tickers-per-P=8-12 64.7k ±46% 65.2k ±78% ~ (p=0.480 n=18+19) StaggeredTickerLatency/work-dur=300µs/tickers-per-P=9-12 147k ±88% 119k ±83% ~ (p=0.092 n=19+18) StaggeredTickerLatency/work-dur=300µs/tickers-per-P=10-12 2.63M ±29% 2.70M ±59% ~ (p=0.989 n=19+20) StaggeredTickerLatency/work-dur=2ms/tickers-per-P=1-12 81.4k ± 4% 80.2k ± 3% -1.55% (p=0.009 n=17+18) name old max-late-ns new max-late-ns delta ParallelTimerLatency-12 7.66M ±102% 6.98M ±131% ~ (p=0.445 n=20+20) StaggeredTickerLatency/work-dur=300µs/tickers-per-P=1-12 381k ±12% 382k ±17% ~ (p=0.901 n=17+16) StaggeredTickerLatency/work-dur=300µs/tickers-per-P=2-12 388k ±69% 356k ±10% ~ (p=0.363 n=17+16) StaggeredTickerLatency/work-dur=300µs/tickers-per-P=3-12 350k ±17% 347k ±25% ~ (p=0.538 n=19+18) StaggeredTickerLatency/work-dur=300µs/tickers-per-P=4-12 378k ±52% 341k ±30% ~ (p=0.153 n=18+17) StaggeredTickerLatency/work-dur=300µs/tickers-per-P=5-12 392k ±54% 410k ±78% ~ (p=0.730 n=18+19) StaggeredTickerLatency/work-dur=300µs/tickers-per-P=6-12 467k ±80% 527k ±129% ~ (p=0.616 n=17+19) StaggeredTickerLatency/work-dur=300µs/tickers-per-P=7-12 915k ±138% 1023k ±227% ~ (p=0.696 n=20+18) StaggeredTickerLatency/work-dur=300µs/tickers-per-P=8-12 1.84M ±155% 1.74M ±158% ~ (p=0.893 n=18+19) StaggeredTickerLatency/work-dur=300µs/tickers-per-P=9-12 6.95M ±92% 7.66M ±91% ~ (p=0.687 n=19+20) StaggeredTickerLatency/work-dur=300µs/tickers-per-P=10-12 18.6M ±22% 16.2M ±28% -12.78% (p=0.003 n=19+19) StaggeredTickerLatency/work-dur=2ms/tickers-per-P=1-12 1.01M ± 8% 1.04M ±10% ~ (p=0.111 n=19+18) Change-Id: I96aa2e0206a6e9286bcbfc8be372e84608ed4e2f Reviewed-on: https://go-review.googlesource.com/c/go/+/314277 Trust: Ian Lance Taylor Reviewed-by: Michael Knyszek --- src/runtime/time_linux_amd64.s | 97 ++++++++++++++++++++++++++++++++++ src/runtime/timeasm.go | 5 +- src/runtime/timestub.go | 6 ++- 3 files changed, 104 insertions(+), 4 deletions(-) create mode 100644 src/runtime/time_linux_amd64.s diff --git a/src/runtime/time_linux_amd64.s b/src/runtime/time_linux_amd64.s new file mode 100644 index 0000000000..0dd7919896 --- /dev/null +++ b/src/runtime/time_linux_amd64.s @@ -0,0 +1,97 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !faketime +// +build !faketime + +#include "go_asm.h" +#include "go_tls.h" +#include "textflag.h" + +#define SYS_clock_gettime 228 + +// func time.now() (sec int64, nsec int32, mono int64) +TEXT time·now(SB),NOSPLIT,$16-24 + MOVQ SP, R12 // Save old SP; R12 unchanged by C code. + +#ifdef GOEXPERIMENT_regabig + MOVQ g_m(R14), BX // BX unchanged by C code. +#else + get_tls(CX) + MOVQ g(CX), AX + MOVQ g_m(AX), BX // BX unchanged by C code. +#endif + + // Store CLOCK_REALTIME results directly to return space. + LEAQ sec+0(FP), SI + + // Set vdsoPC and vdsoSP for SIGPROF traceback. + // Save the old values on stack and restore them on exit, + // so this function is reentrant. + MOVQ m_vdsoPC(BX), CX + MOVQ m_vdsoSP(BX), DX + MOVQ CX, 0(SP) + MOVQ DX, 8(SP) + + MOVQ -8(SI), CX // Sets CX to function return address. + MOVQ CX, m_vdsoPC(BX) + MOVQ SI, m_vdsoSP(BX) + +#ifdef GOEXPERIMENT_regabig + CMPQ R14, m_curg(BX) // Only switch if on curg. +#else + CMPQ AX, m_curg(BX) // Only switch if on curg. +#endif + JNE noswitch + + MOVQ m_g0(BX), DX + MOVQ (g_sched+gobuf_sp)(DX), SP // Set SP to g0 stack + +noswitch: + SUBQ $16, SP // Space for monotonic time results + ANDQ $~15, SP // Align for C code + + MOVL $0, DI // CLOCK_REALTIME + MOVQ runtime·vdsoClockgettimeSym(SB), AX + CMPQ AX, $0 + JEQ fallback + CALL AX + + MOVL $1, DI // CLOCK_MONOTONIC + LEAQ 0(SP), SI + MOVQ runtime·vdsoClockgettimeSym(SB), AX + CALL AX + +ret: + MOVQ 0(SP), AX // sec + MOVQ 8(SP), DX // nsec + + MOVQ R12, SP // Restore real SP + // Restore vdsoPC, vdsoSP + // We don't worry about being signaled between the two stores. + // If we are not in a signal handler, we'll restore vdsoSP to 0, + // and no one will care about vdsoPC. If we are in a signal handler, + // we cannot receive another signal. + MOVQ 8(SP), CX + MOVQ CX, m_vdsoSP(BX) + MOVQ 0(SP), CX + MOVQ CX, m_vdsoPC(BX) + + // sec is in AX, nsec in DX + // return nsec in AX + IMULQ $1000000000, AX + ADDQ DX, AX + MOVQ AX, mono+16(FP) + RET + +fallback: + MOVQ $SYS_clock_gettime, AX + SYSCALL + + MOVL $1, DI // CLOCK_MONOTONIC + LEAQ 0(SP), SI + MOVQ $SYS_clock_gettime, AX + SYSCALL + + JMP ret diff --git a/src/runtime/timeasm.go b/src/runtime/timeasm.go index f0c09461bd..468ff8a0d3 100644 --- a/src/runtime/timeasm.go +++ b/src/runtime/timeasm.go @@ -4,8 +4,9 @@ // Declarations for operating systems implementing time.now directly in assembly. -//go:build !faketime && windows -// +build !faketime,windows +//go:build !faketime && (windows || (linux && amd64)) +// +build !faketime +// +build windows linux,amd64 package runtime diff --git a/src/runtime/timestub.go b/src/runtime/timestub.go index a3d9d58286..6f16c70b81 100644 --- a/src/runtime/timestub.go +++ b/src/runtime/timestub.go @@ -5,8 +5,10 @@ // Declarations for operating systems implementing time.now // indirectly, in terms of walltime and nanotime assembly. -//go:build !faketime && !windows -// +build !faketime,!windows +//go:build !faketime && !windows && !(linux && amd64) +// +build !faketime +// +build !windows +// +build !linux !amd64 package runtime -- 2.50.0