]> Cypherpunks repositories - gostls13.git/commitdiff
internal/bytealg: optimize Equal for arm64 target
authorVasily Leonenko <vasiliy.leonenko@gmail.com>
Tue, 23 Jul 2024 20:23:33 +0000 (23:23 +0300)
committerGopher Robot <gobot@golang.org>
Tue, 6 Aug 2024 15:36:27 +0000 (15:36 +0000)
Remove redundant intermediate jump in runtime.memequal
Remove redundant a.ptr==b.ptr check in runtime.memequal_varlen
Add 16-bytes alignment before some labels in runtime.memequal

goos: linux
goarch: arm64
pkg: bytes
                                │ ./master.log │              ./opt.log              │
                                │    sec/op    │    sec/op     vs base               │
Equal/0-4                         0.8342n ± 0%   0.5254n ± 3%  -37.01% (p=0.000 n=8)
Equal/same/1-4                     2.720n ± 0%    2.720n ± 2%        ~ (p=0.779 n=8)
Equal/same/6-4                     2.720n ± 5%    2.720n ± 2%        ~ (p=0.908 n=8)
Equal/same/9-4                     2.722n ± 2%    2.721n ± 2%        ~ (p=0.779 n=8)
Equal/same/15-4                    2.719n ± 0%    2.719n ± 0%        ~ (p=0.641 n=8)
Equal/same/16-4                    2.721n ± 2%    2.719n ± 0%   -0.07% (p=0.014 n=8)
Equal/same/20-4                    2.720n ± 0%    2.721n ± 2%        ~ (p=0.236 n=8)
Equal/same/32-4                    2.720n ± 1%    2.720n ± 0%        ~ (p=0.396 n=8)
Equal/same/4K-4                    2.719n ± 0%    2.720n ± 0%        ~ (p=0.663 n=8)
Equal/same/4M-4                    2.721n ± 0%    2.720n ± 0%        ~ (p=0.075 n=8)
Equal/same/64M-4                   2.720n ± 0%    2.720n ± 2%        ~ (p=0.806 n=8)
Equal/1-4                          6.671n ± 0%    5.449n ± 0%  -18.33% (p=0.000 n=8)
Equal/6-4                          8.761n ± 2%    7.508n ± 0%  -14.30% (p=0.000 n=8)
Equal/9-4                          8.343n ± 0%    7.091n ± 0%  -15.01% (p=0.000 n=8)
Equal/15-4                         8.339n ± 2%    7.090n ± 0%  -14.98% (p=0.000 n=8)
Equal/16-4                         9.173n ± 0%    7.925n ± 2%  -13.61% (p=0.000 n=8)
Equal/20-4                         11.26n ± 0%    10.01n ± 0%  -11.10% (p=0.000 n=8)
Equal/32-4                        10.425n ± 0%    9.176n ± 0%  -11.98% (p=0.000 n=8)
Equal/4K-4                         192.9n ± 0%    192.7n ± 0%   -0.10% (p=0.044 n=8)
Equal/4M-4                         191.3µ ± 0%    191.3µ ± 0%        ~ (p=0.798 n=8)
Equal/64M-4                        3.066m ± 2%    3.065m ± 0%        ~ (p=0.083 n=8)
EqualBothUnaligned/64_0-4          7.506n ± 2%    7.090n ± 2%   -5.55% (p=0.000 n=8)
EqualBothUnaligned/64_1-4          7.850n ± 1%    7.423n ± 0%   -5.43% (p=0.000 n=8)
EqualBothUnaligned/64_4-4          7.505n ± 0%    7.088n ± 0%   -5.56% (p=0.000 n=8)
EqualBothUnaligned/64_7-4          7.840n ± 0%    7.413n ± 0%   -5.44% (p=0.000 n=8)
EqualBothUnaligned/4096_0-4        193.0n ± 4%    190.9n ± 0%   -1.09% (p=0.004 n=8)
EqualBothUnaligned/4096_1-4        223.9n ± 0%    223.1n ± 0%   -0.36% (p=0.000 n=8)
EqualBothUnaligned/4096_4-4        191.9n ± 2%    191.5n ± 0%   -0.21% (p=0.004 n=8)
EqualBothUnaligned/4096_7-4        223.8n ± 0%    223.1n ± 1%        ~ (p=0.098 n=8)
EqualBothUnaligned/4194304_0-4     191.8µ ± 0%    191.8µ ± 0%        ~ (p=0.504 n=8)
EqualBothUnaligned/4194304_1-4     225.4µ ± 2%    225.5µ ± 0%        ~ (p=0.065 n=8)
EqualBothUnaligned/4194304_4-4     192.6µ ± 0%    192.7µ ± 2%   +0.06% (p=0.041 n=8)
EqualBothUnaligned/4194304_7-4     225.4µ ± 0%    225.5µ ± 0%   +0.05% (p=0.050 n=8)
EqualBothUnaligned/67108864_0-4    3.069m ± 0%    3.069m ± 0%        ~ (p=0.314 n=8)
EqualBothUnaligned/67108864_1-4    3.589m ± 0%    3.588m ± 0%        ~ (p=0.959 n=8)
EqualBothUnaligned/67108864_4-4    3.083m ± 0%    3.083m ± 2%        ~ (p=0.505 n=8)
EqualBothUnaligned/67108864_7-4    3.588m ± 0%    3.588m ± 0%        ~ (p=1.000 n=8)
geomean                            199.9n         190.5n        -4.70%

Change-Id: Ib8d0d4006dd39162a600ac98a5f44a0f05136ed3
Reviewed-on: https://go-review.googlesource.com/c/go/+/601135
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Auto-Submit: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
Auto-Submit: Keith Randall <khr@golang.org>

src/internal/bytealg/equal_arm64.s

index 4db951547443d57ae736980eaf3d3729fe072df5..408ab374e629943d5acfb5423fa46be73cf258bb 100644 (file)
@@ -5,25 +5,11 @@
 #include "go_asm.h"
 #include "textflag.h"
 
-// memequal(a, b unsafe.Pointer, size uintptr) bool
-TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
-       // short path to handle 0-byte case
-       CBZ     R2, equal
-       // short path to handle equal pointers
-       CMP     R0, R1
-       BEQ     equal
-       B       memeqbody<>(SB)
-equal:
-       MOVD    $1, R0
-       RET
-
 // memequal_varlen(a, b unsafe.Pointer) bool
 TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
-       CMP     R0, R1
-       BEQ     eq
        MOVD    8(R26), R2    // compiler stores size at offset 8 in the closure
        CBZ     R2, eq
-       B       memeqbody<>(SB)
+       B       runtime·memequal<ABIInternal>(SB)
 eq:
        MOVD    $1, R0
        RET
@@ -33,7 +19,13 @@ eq:
 // R1: pointer b
 // R2: data len
 // at return: result in R0
-TEXT memeqbody<>(SB),NOSPLIT,$0
+// memequal(a, b unsafe.Pointer, size uintptr) bool
+TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
+       // short path to handle 0-byte case
+       CBZ     R2, equal
+       // short path to handle equal pointers
+       CMP     R0, R1
+       BEQ     equal
        CMP     $1, R2
        // handle 1-byte special case for better performance
        BEQ     one
@@ -91,6 +83,7 @@ tail:
        EOR     R4, R5
        CBNZ    R5, not_equal
        B       equal
+       PCALIGN $16
 lt_8:
        TBZ     $2, R2, lt_4
        MOVWU   (R0), R4
@@ -103,6 +96,7 @@ lt_8:
        EOR     R4, R5
        CBNZ    R5, not_equal
        B       equal
+       PCALIGN $16
 lt_4:
        TBZ     $1, R2, lt_2
        MOVHU.P 2(R0), R4