From 20ea98842159d7cde30062711527495ac00bcacd Mon Sep 17 00:00:00 2001 From: Srinivas Pokala Date: Mon, 20 Feb 2023 07:10:37 +0100 Subject: [PATCH] math: huge argument handling for sin/cos in s390x MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Currently s390x, sin/cos assembly implementation not handling huge arguments. This change reverts assembly routine to native go implementation for huge arguments. Implementing the changes in assembly giving better performance than native go changes in terms of execution/cycles. name Go_changes Asm_changes Sin/input_size_(0.5)-8 11.85ns ± 0% 5.32ns ± 1% Sin/input_size_(1<<20)-8 15.32ns ± 0% 9.75ns ± 3% Sin/input_size_(1<<_40)-8 17.9ns ± 0% 10.3ns ± 6% Sin/input_size_(1<<50)-8 16.33ns ± 0% 9.75ns ± 6% Sin/input_size_(1<<60)-8 33.0ns ± 1% 29.1ns ± 0% Sin/input_size_(1<<80)-8 29.9ns ± 0% 27.2ns ± 2% Sin/input_size_(1<<200)-8 31.5ns ± 1% 28.3ns ± 0% Sin/input_size_(1<<480)-8 29.4ns ± 1% 28.0ns ± 1% Sin/input_size_(1234567891234567_<<_180)-8 29.3ns ± 1% 28.0ns ± 0% Cos/input_size_(0.5)-8 10.33ns ± 0% 5.69ns ± 1% Cos/input_size_(1<<20)-8 16.67ns ± 0% 9.18ns ± 0% Cos/input_size_(1<<_40)-8 18.50ns ± 0% 9.45ns ± 3% Cos/input_size_(1<<50)-8 16.67ns ± 0% 9.18ns ± 1% Cos/input_size_(1<<60)-8 31.6ns ± 1% 26.7ns ± 2% Cos/input_size_(1<<80)-8 31.3ns ± 0% 25.5ns ± 1% Cos/input_size_(1<<200)-8 30.0ns ± 0% 26.7ns ± 1% Cos/input_size_(1<<480)-8 31.9ns ±2% 27.0ns ± 0% Cos/input_size_(1234567891234567_<<_180)-8 31.8ns ± 0% 26.9ns ± 0% Fixes #29240 Change-Id: Id2ebcfa113926f27510d527e80daaddad925a707 Reviewed-on: https://go-review.googlesource.com/c/go/+/469635 TryBot-Result: Gopher Robot Reviewed-by: Bill O'Farrell Reviewed-by: Cherry Mui Reviewed-by: Heschi Kreinick Run-TryBot: Ian Lance Taylor Reviewed-by: Keith Randall Reviewed-by: Keith Randall --- src/math/sin_s390x.s | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/math/sin_s390x.s b/src/math/sin_s390x.s index 7eb2206ca9..79d564b938 100644 --- a/src/math/sin_s390x.s +++ b/src/math/sin_s390x.s @@ -73,6 +73,10 @@ TEXT ·sinAsm(SB),NOSPLIT,$0-16 BLTU L17 FMOVD F0, F5 L2: + MOVD $sincosxlim<>+0(SB), R1 + FMOVD 0(R1), F1 + FCMPU F5, F1 + BGT L16 MOVD $sincoss7<>+0(SB), R1 FMOVD 0(R1), F4 MOVD $sincoss6<>+0(SB), R1 @@ -205,6 +209,8 @@ L15: RET +L16: + BR ·sin(SB) //tail call sinIsZero: FMOVD F0, ret+8(FP) RET @@ -223,6 +229,10 @@ TEXT ·cosAsm(SB),NOSPLIT,$0-16 BLTU L35 FMOVD F0, F1 L21: + MOVD $sincosxlim<>+0(SB), R1 + FMOVD 0(R1), F2 + FCMPU F1, F2 + BGT L30 MOVD $sincosc7<>+0(SB), R1 FMOVD 0(R1), F4 MOVD $sincosc6<>+0(SB), R1 @@ -354,3 +364,6 @@ L34: FNEG F0, F0 FMOVD F0, ret+8(FP) RET + +L30: + BR ·cos(SB) //tail call -- 2.48.1