]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: implement time.now in assembly for linux-amd64
authorIan Lance Taylor <iant@golang.org>
Tue, 27 Apr 2021 23:46:53 +0000 (16:46 -0700)
committerIan Lance Taylor <iant@golang.org>
Thu, 29 Apr 2021 04:18:39 +0000 (04:18 +0000)
name                                                       old time/op      new time/op      delta
AfterFunc-12                                                   66.7µs ± 3%      66.8µs ± 4%     ~     (p=0.836 n=20+20)
After-12                                                       99.4µs ± 4%      98.1µs ± 3%   -1.31%  (p=0.013 n=20+20)
Stop-12                                                        66.1µs ±12%      65.4µs ±10%     ~     (p=0.602 n=20+20)
SimultaneousAfterFunc-12                                        110µs ± 1%       114µs ± 2%   +3.98%  (p=0.000 n=19+18)
StartStop-12                                                   32.1µs ±15%      32.2µs ±13%     ~     (p=0.620 n=20+20)
Reset-12                                                       3.66µs ± 2%      3.63µs ± 5%   -0.92%  (p=0.018 n=20+17)
Sleep-12                                                        134µs ± 1%       139µs ± 4%   +3.97%  (p=0.000 n=20+18)
Ticker-12                                                      32.8µs ± 1%      32.6µs ± 2%   -0.63%  (p=0.017 n=18+20)
TickerReset-12                                                 3.72µs ± 3%      3.71µs ± 3%     ~     (p=0.753 n=20+20)
TickerResetNaive-12                                            68.9µs ±11%      65.8µs ± 8%   -4.44%  (p=0.008 n=20+20)
Now-12                                                         33.3ns ± 1%      29.6ns ± 0%  -11.06%  (p=0.000 n=18+16)
NowUnixNano-12                                                 34.6ns ± 0%      31.2ns ± 0%   -9.94%  (p=0.000 n=20+17)
NowUnixMilli-12                                                35.0ns ± 1%      30.9ns ± 0%  -11.75%  (p=0.000 n=19+15)
NowUnixMicro-12                                                35.0ns ± 0%      30.9ns ± 0%  -11.85%  (p=0.000 n=20+19)
Format-12                                                       302ns ± 3%       306ns ± 3%   +1.22%  (p=0.009 n=20+20)
FormatNow-12                                                    184ns ± 5%       187ns ± 2%   +1.25%  (p=0.046 n=20+19)
MarshalJSON-12                                                  262ns ± 2%       270ns ± 3%   +2.99%  (p=0.000 n=20+20)
MarshalText-12                                                  262ns ± 3%       268ns ± 3%   +2.37%  (p=0.000 n=19+19)
Parse-12                                                        145ns ± 1%       148ns ± 0%   +2.27%  (p=0.000 n=18+19)
ParseDuration-12                                               82.3ns ± 1%      79.7ns ± 1%   -3.06%  (p=0.000 n=20+20)
Hour-12                                                        4.48ns ± 1%      4.42ns ± 1%   -1.32%  (p=0.000 n=19+19)
Second-12                                                      4.44ns ± 1%      4.42ns ± 1%   -0.39%  (p=0.000 n=20+18)
Year-12                                                        11.2ns ± 1%      11.1ns ± 1%     ~     (p=0.193 n=20+20)
Day-12                                                         14.8ns ± 0%      14.8ns ± 1%     ~     (p=0.873 n=19+20)
ISOWeek-12                                                     17.2ns ± 0%      17.2ns ± 0%     ~     (p=0.605 n=18+20)

name                                                       old avg-late-ns  new avg-late-ns  delta
ParallelTimerLatency-12                                          375k ± 3%        377k ± 3%     ~     (p=0.445 n=20+20)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=1-12         136k ± 2%        137k ± 2%     ~     (p=0.242 n=20+20)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=2-12        97.4k ±11%       96.4k ±10%     ~     (p=0.336 n=19+20)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=3-12        74.8k ± 3%       74.2k ± 3%     ~     (p=0.158 n=20+19)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=4-12        70.7k ± 7%       70.4k ± 6%     ~     (p=0.879 n=20+19)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=5-12        65.8k ± 9%       66.3k ±14%     ~     (p=0.594 n=17+19)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=6-12        55.5k ±29%       56.7k ±30%     ~     (p=0.758 n=20+20)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=7-12        45.3k ±29%       43.6k ±33%     ~     (p=0.212 n=19+19)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=8-12        64.7k ±46%       65.2k ±78%     ~     (p=0.480 n=18+19)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=9-12         147k ±88%        119k ±83%     ~     (p=0.092 n=19+18)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=10-12       2.63M ±29%       2.70M ±59%     ~     (p=0.989 n=19+20)
StaggeredTickerLatency/work-dur=2ms/tickers-per-P=1-12          81.4k ± 4%       80.2k ± 3%   -1.55%  (p=0.009 n=17+18)

name                                                       old max-late-ns  new max-late-ns  delta
ParallelTimerLatency-12                                        7.66M ±102%      6.98M ±131%     ~     (p=0.445 n=20+20)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=1-12         381k ±12%        382k ±17%     ~     (p=0.901 n=17+16)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=2-12         388k ±69%        356k ±10%     ~     (p=0.363 n=17+16)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=3-12         350k ±17%        347k ±25%     ~     (p=0.538 n=19+18)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=4-12         378k ±52%        341k ±30%     ~     (p=0.153 n=18+17)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=5-12         392k ±54%        410k ±78%     ~     (p=0.730 n=18+19)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=6-12         467k ±80%       527k ±129%     ~     (p=0.616 n=17+19)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=7-12        915k ±138%      1023k ±227%     ~     (p=0.696 n=20+18)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=8-12       1.84M ±155%      1.74M ±158%     ~     (p=0.893 n=18+19)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=9-12        6.95M ±92%       7.66M ±91%     ~     (p=0.687 n=19+20)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=10-12       18.6M ±22%       16.2M ±28%  -12.78%  (p=0.003 n=19+19)
StaggeredTickerLatency/work-dur=2ms/tickers-per-P=1-12          1.01M ± 8%       1.04M ±10%     ~     (p=0.111 n=19+18)

Change-Id: I96aa2e0206a6e9286bcbfc8be372e84608ed4e2f
Reviewed-on: https://go-review.googlesource.com/c/go/+/314277
Trust: Ian Lance Taylor <iant@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
src/runtime/time_linux_amd64.s [new file with mode: 0644]
src/runtime/timeasm.go
src/runtime/timestub.go

diff --git a/src/runtime/time_linux_amd64.s b/src/runtime/time_linux_amd64.s
new file mode 100644 (file)
index 0000000..0dd7919
--- /dev/null
@@ -0,0 +1,97 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !faketime
+// +build !faketime
+
+#include "go_asm.h"
+#include "go_tls.h"
+#include "textflag.h"
+
+#define SYS_clock_gettime      228
+
+// func time.now() (sec int64, nsec int32, mono int64)
+TEXT time·now(SB),NOSPLIT,$16-24
+       MOVQ    SP, R12 // Save old SP; R12 unchanged by C code.
+
+#ifdef GOEXPERIMENT_regabig
+       MOVQ    g_m(R14), BX // BX unchanged by C code.
+#else
+       get_tls(CX)
+       MOVQ    g(CX), AX
+       MOVQ    g_m(AX), BX // BX unchanged by C code.
+#endif
+
+       // Store CLOCK_REALTIME results directly to return space.
+       LEAQ    sec+0(FP), SI
+
+       // Set vdsoPC and vdsoSP for SIGPROF traceback.
+       // Save the old values on stack and restore them on exit,
+       // so this function is reentrant.
+       MOVQ    m_vdsoPC(BX), CX
+       MOVQ    m_vdsoSP(BX), DX
+       MOVQ    CX, 0(SP)
+       MOVQ    DX, 8(SP)
+
+       MOVQ    -8(SI), CX      // Sets CX to function return address.
+       MOVQ    CX, m_vdsoPC(BX)
+       MOVQ    SI, m_vdsoSP(BX)
+
+#ifdef GOEXPERIMENT_regabig
+       CMPQ    R14, m_curg(BX) // Only switch if on curg.
+#else
+       CMPQ    AX, m_curg(BX)  // Only switch if on curg.
+#endif
+       JNE     noswitch
+
+       MOVQ    m_g0(BX), DX
+       MOVQ    (g_sched+gobuf_sp)(DX), SP      // Set SP to g0 stack
+
+noswitch:
+       SUBQ    $16, SP         // Space for monotonic time results
+       ANDQ    $~15, SP        // Align for C code
+
+       MOVL    $0, DI // CLOCK_REALTIME
+       MOVQ    runtime·vdsoClockgettimeSym(SB), AX
+       CMPQ    AX, $0
+       JEQ     fallback
+       CALL    AX
+
+       MOVL    $1, DI // CLOCK_MONOTONIC
+       LEAQ    0(SP), SI
+       MOVQ    runtime·vdsoClockgettimeSym(SB), AX
+       CALL    AX
+
+ret:
+       MOVQ    0(SP), AX       // sec
+       MOVQ    8(SP), DX       // nsec
+
+       MOVQ    R12, SP         // Restore real SP
+       // Restore vdsoPC, vdsoSP
+       // We don't worry about being signaled between the two stores.
+       // If we are not in a signal handler, we'll restore vdsoSP to 0,
+       // and no one will care about vdsoPC. If we are in a signal handler,
+       // we cannot receive another signal.
+       MOVQ    8(SP), CX
+       MOVQ    CX, m_vdsoSP(BX)
+       MOVQ    0(SP), CX
+       MOVQ    CX, m_vdsoPC(BX)
+
+       // sec is in AX, nsec in DX
+       // return nsec in AX
+       IMULQ   $1000000000, AX
+       ADDQ    DX, AX
+       MOVQ    AX, mono+16(FP)
+       RET
+
+fallback:
+       MOVQ    $SYS_clock_gettime, AX
+       SYSCALL
+
+       MOVL    $1, DI // CLOCK_MONOTONIC
+       LEAQ    0(SP), SI
+       MOVQ    $SYS_clock_gettime, AX
+       SYSCALL
+
+       JMP     ret
index f0c09461bdc2b08c22448368e321363c7ce0fdd9..468ff8a0d33dcdbcdd2c0c6c37c23101e59fa8ca 100644 (file)
@@ -4,8 +4,9 @@
 
 // Declarations for operating systems implementing time.now directly in assembly.
 
-//go:build !faketime && windows
-// +build !faketime,windows
+//go:build !faketime && (windows || (linux && amd64))
+// +build !faketime
+// +build windows linux,amd64
 
 package runtime
 
index a3d9d5828655952bcec7959934e42ef64505cbde..6f16c70b8160b18a0d1131230914c6a63a5ea88a 100644 (file)
@@ -5,8 +5,10 @@
 // Declarations for operating systems implementing time.now
 // indirectly, in terms of walltime and nanotime assembly.
 
-//go:build !faketime && !windows
-// +build !faketime,!windows
+//go:build !faketime && !windows && !(linux && amd64)
+// +build !faketime
+// +build !windows
+// +build !linux !amd64
 
 package runtime