From: Lynn Boger Date: Tue, 11 Sep 2018 15:42:15 +0000 (-0400) Subject: runtime: improve CALLFN macro for ppc64x X-Git-Tag: go1.12beta1~1012 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=e945623930d2c85b9a81476203451ca8f4092875;p=gostls13.git runtime: improve CALLFN macro for ppc64x The previous CALLFN macro was copying a single byte at a time which is extremely inefficient on ppc64x. This changes the macro so it copies 8 bytes at a time. benchmark in reflect: name old time/op new time/op delta Call-8 177ns ± 0% 165ns ± 0% -6.78% (p=1.000 n=1+1) CallArgCopy/size=128-8 194ns ± 0% 140ns ± 0% -27.84% (p=1.000 n=1+1) CallArgCopy/size=256-8 253ns ± 0% 159ns ± 0% -37.15% (p=1.000 n=1+1) CallArgCopy/size=1024-8 612ns ± 0% 222ns ± 0% -63.73% (p=1.000 n=1+1) CallArgCopy/size=4096-8 2.14µs ± 0% 0.53µs ± 0% -75.01% (p=1.000 n=1+1) CallArgCopy/size=65536-8 33.0µs ± 0% 7.3µs ± 0% -77.72% (p=1.000 n=1+1) Change-Id: I71f6ee788264e61bb072264d21b77b83592c9dca Reviewed-on: https://go-review.googlesource.com/134635 Run-TryBot: Lynn Boger TryBot-Result: Gobot Gobot Reviewed-by: Carlos Eduardo Seo Reviewed-by: Michael Munday --- diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s index 57877c0194..b180cb06ab 100644 --- a/src/runtime/asm_ppc64x.s +++ b/src/runtime/asm_ppc64x.s @@ -390,15 +390,36 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \ /* copy arguments to stack */ \ MOVD arg+16(FP), R3; \ MOVWZ argsize+24(FP), R4; \ - MOVD R1, R5; \ - ADD $(FIXED_FRAME-1), R5; \ - SUB $1, R3; \ - ADD R5, R4; \ - CMP R5, R4; \ - BEQ 4(PC); \ - MOVBZU 1(R3), R6; \ - MOVBZU R6, 1(R5); \ - BR -4(PC); \ + MOVD R1, R5; \ + CMP R4, $8; \ + BLT tailsetup; \ + /* copy 8 at a time if possible */ \ + ADD $(FIXED_FRAME-8), R5; \ + SUB $8, R3; \ +top: \ + MOVDU 8(R3), R7; \ + MOVDU R7, 8(R5); \ + SUB $8, R4; \ + CMP R4, $8; \ + BGE top; \ + /* handle remaining bytes */ \ + CMP $0, R4; \ + BEQ callfn; \ + ADD $7, R3; \ + ADD $7, R5; \ + BR tail; \ +tailsetup: \ + CMP $0, R4; \ + BEQ callfn; \ + ADD $(FIXED_FRAME-1), R5; \ + SUB $1, R3; \ +tail: \ + MOVBU 1(R3), R6; \ + MOVBU R6, 1(R5); \ + SUB $1, R4; \ + CMP $0, R4; \ + BGT tail; \ +callfn: \ /* call function */ \ MOVD f+8(FP), R11; \ MOVD (R11), R12; \