From b2e809bab59a692aa6a69e1bd1d32eeeab4622e3 Mon Sep 17 00:00:00 2001 From: Paul Murphy Date: Thu, 17 Aug 2023 16:35:55 -0500 Subject: [PATCH] internal/bytealg: improve compare on Power10/PPC64 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Handle comparisons of 15 or less bytes more efficiently with Power10 instructions when building with GOPPC64=power10. name old time/op new time/op delta BytesCompare/1 2.53ns ± 0% 2.17ns ± 0% -14.17% BytesCompare/2 2.70ns ± 0% 2.17ns ± 0% -19.77% BytesCompare/4 2.59ns ± 0% 2.17ns ± 0% -16.20% BytesCompare/8 2.66ns ± 0% 2.17ns ± 0% -18.63% Change-Id: I6d7c6af0a58ea3e03acc3930c54b77f2ac1dfbd5 Reviewed-on: https://go-review.googlesource.com/c/go/+/522315 Reviewed-by: Joedian Reid Run-TryBot: Paul Murphy Reviewed-by: Lynn Boger Reviewed-by: Bryan Mills TryBot-Result: Gopher Robot --- src/internal/bytealg/compare_ppc64x.s | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/internal/bytealg/compare_ppc64x.s b/src/internal/bytealg/compare_ppc64x.s index 63c33ee635..2629251e43 100644 --- a/src/internal/bytealg/compare_ppc64x.s +++ b/src/internal/bytealg/compare_ppc64x.s @@ -274,7 +274,16 @@ lower: RET PCALIGN $16 -cmp8: // 8 - 15B +cmp8: // 8 - 15B (0 - 15B if GOPPC64_power10) +#ifdef GOPPC64_power10 + SLD $56,R9,R9 + LXVLL R5,R9,V3 // Load bytes starting from MSB to LSB, unused are zero filled. + LXVLL R6,R9,V4 + VCMPUQ V3,V4,CR0 // Compare as a 128b integer. + SETB_CR0(R6) + ISEL CR0EQ,R3,R6,R3 // If equal, length determines the return value. + RET +#else CMP R9,$8 BLT cmp4 ANDCC $7,R9,R9 @@ -330,3 +339,4 @@ cmp0: SETB_CR0(R6) ISEL CR0EQ,R3,R6,R3 RET +#endif -- 2.50.0