From 73e1affef3dbaee967ac5ebf1cb5736ec7067011 Mon Sep 17 00:00:00 2001 From: Archana R Date: Wed, 11 Jan 2023 11:45:28 -0600 Subject: [PATCH] [release-branch.go1.19] runtime: fix performance regression in morestack_noctxt on ppc64 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit In the fix for 54332 the MOVD R1, R1 instruction was added to morestack_noctxt function to set the SPWRITE bit. However, the instruction MOVD R1, R1 results in or r1,r1,r1 which is a special instruction on ppc64 architecture as it changes the thread priority and can negatively impact performance in some cases. More details on such similar nops can be found in Power ISA v3.1 Book II on Power ISA Virtual Environment architecture in the chapter on Program Priority Registers and Or instructions. Replacing this by OR R0, R1 has the same affect on setting SPWRITE as needed by the first fix but does not affect thread priority and hence does not cause the degradation in performance Hash65536-64 2.81GB/s ±10% 16.69GB/s ± 0% +494.44% Fixes #57812 Change-Id: Ib912e3716c6afd277994d6c1c5b2891f82225d50 Reviewed-on: https://go-review.googlesource.com/c/go/+/461597 Reviewed-by: Benny Siegert Reviewed-by: Lynn Boger Auto-Submit: Benny Siegert Reviewed-by: Cherry Mui TryBot-Result: Gopher Robot Run-TryBot: Lynn Boger (cherry picked from commit 1c65b69bd1dbc930c6246877f6c21c81f2a60d55) Reviewed-on: https://go-review.googlesource.com/c/go/+/462335 Run-TryBot: Archana Ravindar Reviewed-by: Dmitri Shuralyov --- src/runtime/asm_ppc64x.s | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s index 6a162eff0a..61ff17a934 100644 --- a/src/runtime/asm_ppc64x.s +++ b/src/runtime/asm_ppc64x.s @@ -339,8 +339,11 @@ TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0 // the caller doesn't save LR on stack but passes it as a // register (R5), and the unwinder currently doesn't understand. // Make it SPWRITE to stop unwinding. (See issue 54332) - MOVD R1, R1 + // Use OR R0, R1 instead of MOVD R1, R1 as the MOVD instruction + // has a special affect on Power8,9,10 by lowering the thread + // priority and causing a slowdown in execution time + OR R0, R1 MOVD R0, R11 BR runtime·morestack(SB) -- 2.50.0