From: Paul Murphy Date: Thu, 17 Aug 2023 21:35:55 +0000 (-0500) Subject: internal/bytealg: improve compare on Power10/PPC64 X-Git-Tag: go1.22rc1~1067 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=b2e809bab59a692aa6a69e1bd1d32eeeab4622e3;p=gostls13.git internal/bytealg: improve compare on Power10/PPC64 Handle comparisons of 15 or less bytes more efficiently with Power10 instructions when building with GOPPC64=power10. name old time/op new time/op delta BytesCompare/1 2.53ns ± 0% 2.17ns ± 0% -14.17% BytesCompare/2 2.70ns ± 0% 2.17ns ± 0% -19.77% BytesCompare/4 2.59ns ± 0% 2.17ns ± 0% -16.20% BytesCompare/8 2.66ns ± 0% 2.17ns ± 0% -18.63% Change-Id: I6d7c6af0a58ea3e03acc3930c54b77f2ac1dfbd5 Reviewed-on: https://go-review.googlesource.com/c/go/+/522315 Reviewed-by: Joedian Reid Run-TryBot: Paul Murphy Reviewed-by: Lynn Boger Reviewed-by: Bryan Mills TryBot-Result: Gopher Robot --- diff --git a/src/internal/bytealg/compare_ppc64x.s b/src/internal/bytealg/compare_ppc64x.s index 63c33ee635..2629251e43 100644 --- a/src/internal/bytealg/compare_ppc64x.s +++ b/src/internal/bytealg/compare_ppc64x.s @@ -274,7 +274,16 @@ lower: RET PCALIGN $16 -cmp8: // 8 - 15B +cmp8: // 8 - 15B (0 - 15B if GOPPC64_power10) +#ifdef GOPPC64_power10 + SLD $56,R9,R9 + LXVLL R5,R9,V3 // Load bytes starting from MSB to LSB, unused are zero filled. + LXVLL R6,R9,V4 + VCMPUQ V3,V4,CR0 // Compare as a 128b integer. + SETB_CR0(R6) + ISEL CR0EQ,R3,R6,R3 // If equal, length determines the return value. + RET +#else CMP R9,$8 BLT cmp4 ANDCC $7,R9,R9 @@ -330,3 +339,4 @@ cmp0: SETB_CR0(R6) ISEL CR0EQ,R3,R6,R3 RET +#endif