]>
Cypherpunks repositories - gostls13.git/commit
cmd/compile: optimize shifts of int32 and uint32 on loong64
goos: linux
goarch: loong64
pkg: math/bits
cpu: Loongson-3A6000-HV @ 2500.00MHz
| bench.old | bench.new |
| sec/op | sec/op vs base |
LeadingZeros 1.100n ± 1% 1.101n ± 0% ~ (p=0.566 n=10)
LeadingZeros8 1.501n ± 0% 1.502n ± 0% +0.07% (p=0.000 n=10)
LeadingZeros16 1.501n ± 0% 1.502n ± 0% +0.07% (p=0.000 n=10)
LeadingZeros32 1.2010n ± 0% 0.9511n ± 0% -20.81% (p=0.000 n=10)
LeadingZeros64 1.104n ± 1% 1.119n ± 0% +1.40% (p=0.000 n=10)
TrailingZeros 0.8137n ± 0% 0.8086n ± 0% -0.63% (p=0.001 n=10)
TrailingZeros8 1.031n ± 1% 1.031n ± 1% ~ (p=0.956 n=10)
TrailingZeros16 0.8204n ± 1% 0.8114n ± 0% -1.11% (p=0.000 n=10)
TrailingZeros32 0.8145n ± 0% 0.8090n ± 0% -0.68% (p=0.000 n=10)
TrailingZeros64 0.8159n ± 0% 0.8089n ± 1% -0.86% (p=0.000 n=10)
OnesCount 0.8672n ± 0% 0.8677n ± 0% +0.06% (p=0.000 n=10)
OnesCount8 0.8005n ± 0% 0.8009n ± 0% +0.06% (p=0.000 n=10)
OnesCount16 0.9339n ± 0% 0.9344n ± 0% +0.05% (p=0.000 n=10)
OnesCount32 0.8672n ± 0% 0.8677n ± 0% +0.06% (p=0.000 n=10)
OnesCount64 1.201n ± 0% 1.201n ± 0% ~ (p=0.474 n=10)
RotateLeft 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10)
RotateLeft8 1.202n ± 0% 1.202n ± 0% ~ (p=0.210 n=10)
RotateLeft16 0.8050n ± 0% 0.8036n ± 0% -0.17% (p=0.002 n=10)
RotateLeft32 0.6674n ± 0% 0.6674n ± 0% ~ (p=1.000 n=10)
RotateLeft64 0.6673n ± 0% 0.6674n ± 0% ~ (p=0.072 n=10)
Reverse 0.4123n ± 0% 0.4067n ± 1% -1.37% (p=0.000 n=10)
Reverse8 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10)
Reverse16 0.8004n ± 0% 0.8009n ± 0% +0.06% (p=0.000 n=10)
Reverse32 0.8004n ± 0% 0.8009n ± 0% +0.06% (p=0.000 n=10)
Reverse64 0.8004n ± 0% 0.8009n ± 0% +0.06% (p=0.001 n=10)
ReverseBytes 0.4100n ± 1% 0.4057n ± 1% -1.06% (p=0.002 n=10)
ReverseBytes16 0.8004n ± 0% 0.8009n ± 0% +0.07% (p=0.000 n=10)
ReverseBytes32 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10)
ReverseBytes64 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10)
Add 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10)
Add32 1.201n ± 0% 1.201n ± 0% ~ (p=0.474 n=10)
Add64 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10)
Add64multiple 1.831n ± 0% 1.832n ± 0% ~ (p=1.000 n=10)
Sub 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10)
Sub32 1.601n ± 0% 1.602n ± 0% +0.06% (p=0.000 n=10)
Sub64 1.201n ± 0% 1.201n ± 0% ~ (p=0.474 n=10)
Sub64multiple 2.400n ± 0% 2.402n ± 0% +0.10% (p=0.000 n=10)
Mul 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10)
Mul32 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10)
Mul64 0.8004n ± 0% 0.8008n ± 0% +0.05% (p=0.000 n=10)
Div 9.107n ± 0% 9.083n ± 0% ~ (p=0.255 n=10)
Div32 4.009n ± 0% 4.011n ± 0% +0.05% (p=0.000 n=10)
Div64 9.705n ± 0% 9.711n ± 0% +0.06% (p=0.000 n=10)
geomean 1.089n 1.083n -0.62%
goos: linux
goarch: loong64
pkg: math/bits
cpu: Loongson-3A5000 @ 2500.00MHz
| bench.old | bench.new |
| sec/op | sec/op vs base |
LeadingZeros 1.352n ± 0% 1.341n ± 4% -0.81% (p=0.024 n=10)
LeadingZeros8 1.766n ± 0% 1.781n ± 0% +0.88% (p=0.000 n=10)
LeadingZeros16 1.766n ± 0% 1.782n ± 0% +0.88% (p=0.000 n=10)
LeadingZeros32 1.536n ± 0% 1.341n ± 1% -12.73% (p=0.000 n=10)
LeadingZeros64 1.351n ± 1% 1.338n ± 0% -0.96% (p=0.000 n=10)
TrailingZeros 0.9037n ± 0% 0.9025n ± 0% -0.12% (p=0.020 n=10)
TrailingZeros8 1.087n ± 3% 1.056n ± 0% ~ (p=0.060 n=10)
TrailingZeros16 1.101n ± 0% 1.101n ± 0% ~ (p=0.211 n=10)
TrailingZeros32 0.9040n ± 0% 0.9024n ± 1% -0.18% (p=0.017 n=10)
TrailingZeros64 0.9043n ± 0% 0.9028n ± 1% ~ (p=0.118 n=10)
OnesCount 1.503n ± 2% 1.482n ± 1% -1.43% (p=0.001 n=10)
OnesCount8 1.207n ± 0% 1.206n ± 0% -0.12% (p=0.000 n=10)
OnesCount16 1.501n ± 0% 1.534n ± 0% +2.13% (p=0.000 n=10)
OnesCount32 1.483n ± 1% 1.531n ± 1% +3.27% (p=0.000 n=10)
OnesCount64 1.301n ± 0% 1.302n ± 0% +0.08% (p=0.000 n=10)
RotateLeft 0.8136n ± 4% 0.8083n ± 0% -0.66% (p=0.002 n=10)
RotateLeft8 1.311n ± 0% 1.310n ± 0% ~ (p=0.786 n=10)
RotateLeft16 1.165n ± 0% 1.149n ± 0% -1.33% (p=0.001 n=10)
RotateLeft32 0.8138n ± 1% 0.8093n ± 0% -0.57% (p=0.017 n=10)
RotateLeft64 0.8149n ± 1% 0.8088n ± 0% -0.74% (p=0.000 n=10)
Reverse 0.5195n ± 1% 0.5109n ± 0% -1.67% (p=0.000 n=10)
Reverse8 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10)
Reverse16 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10)
Reverse32 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.012 n=10)
Reverse64 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.010 n=10)
ReverseBytes 0.5120n ± 1% 0.5122n ± 2% ~ (p=0.306 n=10)
ReverseBytes16 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10)
ReverseBytes32 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10)
ReverseBytes64 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10)
Add 1.201n ± 0% 1.201n ± 4% ~ (p=0.334 n=10)
Add32 1.201n ± 0% 1.201n ± 0% ~ (p=0.563 n=10)
Add64 1.201n ± 0% 1.201n ± 1% ~ (p=0.652 n=10)
Add64multiple 1.909n ± 0% 1.902n ± 0% ~ (p=0.126 n=10)
Sub 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10)
Sub32 1.655n ± 0% 1.654n ± 0% ~ (p=0.589 n=10)
Sub64 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10)
Sub64multiple 2.150n ± 0% 2.180n ± 4% +1.37% (p=0.000 n=10)
Mul 0.9341n ± 0% 0.9345n ± 0% +0.04% (p=0.011 n=10)
Mul32 1.053n ± 0% 1.030n ± 0% -2.23% (p=0.000 n=10)
Mul64 0.9341n ± 0% 0.9345n ± 0% +0.04% (p=0.018 n=10)
Div 11.59n ± 0% 11.57n ± 1% ~ (p=0.091 n=10)
Div32 4.337n ± 0% 4.337n ± 1% ~ (p=0.783 n=10)
Div64 12.81n ± 0% 12.76n ± 0% -0.39% (p=0.001 n=10)
geomean 1.257n 1.252n -0.46%
Change-Id: I9e93ea49736760c19dc6b6463d2aa95878121b7b
Reviewed-on: https://go-review.googlesource.com/c/go/+/627855
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Reviewed-by: Junyang Shao <shaojunyang@google.com>