]> Cypherpunks repositories - gostls13.git/commitdiff
internal/bytealg: improve compare on Power10/PPC64
authorPaul Murphy <murp@ibm.com>
Thu, 17 Aug 2023 21:35:55 +0000 (16:35 -0500)
committerPaul Murphy <murp@ibm.com>
Mon, 28 Aug 2023 17:33:20 +0000 (17:33 +0000)
Handle comparisons of 15 or less bytes more efficiently
with Power10 instructions when building with GOPPC64=power10.

name            old time/op  new time/op  delta
BytesCompare/1  2.53ns ± 0%  2.17ns ± 0%  -14.17%
BytesCompare/2  2.70ns ± 0%  2.17ns ± 0%  -19.77%
BytesCompare/4  2.59ns ± 0%  2.17ns ± 0%  -16.20%
BytesCompare/8  2.66ns ± 0%  2.17ns ± 0%  -18.63%

Change-Id: I6d7c6af0a58ea3e03acc3930c54b77f2ac1dfbd5
Reviewed-on: https://go-review.googlesource.com/c/go/+/522315
Reviewed-by: Joedian Reid <joedian@golang.org>
Run-TryBot: Paul Murphy <murp@ibm.com>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Bryan Mills <bcmills@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>

src/internal/bytealg/compare_ppc64x.s

index 63c33ee635b59e7d9acdf23d224b9cc50ec0c51d..2629251e430a8eea9df6a4370d0843bd6b5827c5 100644 (file)
@@ -274,7 +274,16 @@ lower:
        RET
 
        PCALIGN $16
-cmp8:  // 8 - 15B
+cmp8:  // 8 - 15B (0 - 15B if GOPPC64_power10)
+#ifdef GOPPC64_power10
+       SLD     $56,R9,R9
+       LXVLL   R5,R9,V3        // Load bytes starting from MSB to LSB, unused are zero filled.
+       LXVLL   R6,R9,V4
+       VCMPUQ  V3,V4,CR0       // Compare as a 128b integer.
+       SETB_CR0(R6)
+       ISEL    CR0EQ,R3,R6,R3  // If equal, length determines the return value.
+       RET
+#else
        CMP     R9,$8
        BLT     cmp4
        ANDCC   $7,R9,R9
@@ -330,3 +339,4 @@ cmp0:
        SETB_CR0(R6)
        ISEL    CR0EQ,R3,R6,R3
        RET
+#endif