]> Cypherpunks repositories - gostls13.git/commit
internal/bytealg: optimize Equal for arm64 target
authorVasily Leonenko <vasiliy.leonenko@gmail.com>
Tue, 23 Jul 2024 20:23:33 +0000 (23:23 +0300)
committerGopher Robot <gobot@golang.org>
Tue, 6 Aug 2024 15:36:27 +0000 (15:36 +0000)
commitb915399e7e91cba31d4031df507b9c97c33f35d8
tree96c6158e2d0535f9ef1baa05937ee0cac8b78d40
parent1f0c044d60211e435dc58844127544dd3ecb6a41
internal/bytealg: optimize Equal for arm64 target

Remove redundant intermediate jump in runtime.memequal
Remove redundant a.ptr==b.ptr check in runtime.memequal_varlen
Add 16-bytes alignment before some labels in runtime.memequal

goos: linux
goarch: arm64
pkg: bytes
                                │ ./master.log │              ./opt.log              │
                                │    sec/op    │    sec/op     vs base               │
Equal/0-4                         0.8342n ± 0%   0.5254n ± 3%  -37.01% (p=0.000 n=8)
Equal/same/1-4                     2.720n ± 0%    2.720n ± 2%        ~ (p=0.779 n=8)
Equal/same/6-4                     2.720n ± 5%    2.720n ± 2%        ~ (p=0.908 n=8)
Equal/same/9-4                     2.722n ± 2%    2.721n ± 2%        ~ (p=0.779 n=8)
Equal/same/15-4                    2.719n ± 0%    2.719n ± 0%        ~ (p=0.641 n=8)
Equal/same/16-4                    2.721n ± 2%    2.719n ± 0%   -0.07% (p=0.014 n=8)
Equal/same/20-4                    2.720n ± 0%    2.721n ± 2%        ~ (p=0.236 n=8)
Equal/same/32-4                    2.720n ± 1%    2.720n ± 0%        ~ (p=0.396 n=8)
Equal/same/4K-4                    2.719n ± 0%    2.720n ± 0%        ~ (p=0.663 n=8)
Equal/same/4M-4                    2.721n ± 0%    2.720n ± 0%        ~ (p=0.075 n=8)
Equal/same/64M-4                   2.720n ± 0%    2.720n ± 2%        ~ (p=0.806 n=8)
Equal/1-4                          6.671n ± 0%    5.449n ± 0%  -18.33% (p=0.000 n=8)
Equal/6-4                          8.761n ± 2%    7.508n ± 0%  -14.30% (p=0.000 n=8)
Equal/9-4                          8.343n ± 0%    7.091n ± 0%  -15.01% (p=0.000 n=8)
Equal/15-4                         8.339n ± 2%    7.090n ± 0%  -14.98% (p=0.000 n=8)
Equal/16-4                         9.173n ± 0%    7.925n ± 2%  -13.61% (p=0.000 n=8)
Equal/20-4                         11.26n ± 0%    10.01n ± 0%  -11.10% (p=0.000 n=8)
Equal/32-4                        10.425n ± 0%    9.176n ± 0%  -11.98% (p=0.000 n=8)
Equal/4K-4                         192.9n ± 0%    192.7n ± 0%   -0.10% (p=0.044 n=8)
Equal/4M-4                         191.3µ ± 0%    191.3µ ± 0%        ~ (p=0.798 n=8)
Equal/64M-4                        3.066m ± 2%    3.065m ± 0%        ~ (p=0.083 n=8)
EqualBothUnaligned/64_0-4          7.506n ± 2%    7.090n ± 2%   -5.55% (p=0.000 n=8)
EqualBothUnaligned/64_1-4          7.850n ± 1%    7.423n ± 0%   -5.43% (p=0.000 n=8)
EqualBothUnaligned/64_4-4          7.505n ± 0%    7.088n ± 0%   -5.56% (p=0.000 n=8)
EqualBothUnaligned/64_7-4          7.840n ± 0%    7.413n ± 0%   -5.44% (p=0.000 n=8)
EqualBothUnaligned/4096_0-4        193.0n ± 4%    190.9n ± 0%   -1.09% (p=0.004 n=8)
EqualBothUnaligned/4096_1-4        223.9n ± 0%    223.1n ± 0%   -0.36% (p=0.000 n=8)
EqualBothUnaligned/4096_4-4        191.9n ± 2%    191.5n ± 0%   -0.21% (p=0.004 n=8)
EqualBothUnaligned/4096_7-4        223.8n ± 0%    223.1n ± 1%        ~ (p=0.098 n=8)
EqualBothUnaligned/4194304_0-4     191.8µ ± 0%    191.8µ ± 0%        ~ (p=0.504 n=8)
EqualBothUnaligned/4194304_1-4     225.4µ ± 2%    225.5µ ± 0%        ~ (p=0.065 n=8)
EqualBothUnaligned/4194304_4-4     192.6µ ± 0%    192.7µ ± 2%   +0.06% (p=0.041 n=8)
EqualBothUnaligned/4194304_7-4     225.4µ ± 0%    225.5µ ± 0%   +0.05% (p=0.050 n=8)
EqualBothUnaligned/67108864_0-4    3.069m ± 0%    3.069m ± 0%        ~ (p=0.314 n=8)
EqualBothUnaligned/67108864_1-4    3.589m ± 0%    3.588m ± 0%        ~ (p=0.959 n=8)
EqualBothUnaligned/67108864_4-4    3.083m ± 0%    3.083m ± 2%        ~ (p=0.505 n=8)
EqualBothUnaligned/67108864_7-4    3.588m ± 0%    3.588m ± 0%        ~ (p=1.000 n=8)
geomean                            199.9n         190.5n        -4.70%

Change-Id: Ib8d0d4006dd39162a600ac98a5f44a0f05136ed3
Reviewed-on: https://go-review.googlesource.com/c/go/+/601135
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Auto-Submit: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
Auto-Submit: Keith Randall <khr@golang.org>
src/internal/bytealg/equal_arm64.s