From 3cb1e9d98a98abed5fbdcf78a54956851310fe30 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 19 Mar 2019 14:54:40 +0100 Subject: [PATCH] internal/bytealg: add assembly implementation of Count/CountString on arm MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Simple single-byte loop count for now, to be further improved in future CLs. Benchmark on linux/arm: name old time/op new time/op delta CountSingle/10-4 122ns ± 0% 87ns ± 1% -28.41% (p=0.000 n=7+10) CountSingle/32-4 242ns ± 0% 174ns ± 1% -28.25% (p=0.000 n=10+10) CountSingle/4K-4 24.2µs ± 1% 15.6µs ± 1% -35.42% (p=0.000 n=10+10) CountSingle/4M-4 29.6ms ± 1% 21.3ms ± 1% -28.09% (p=0.000 n=10+9) CountSingle/64M-4 562ms ± 0% 414ms ± 1% -26.23% (p=0.000 n=8+10) name old speed new speed delta CountSingle/10-4 81.7MB/s ± 1% 114.5MB/s ± 1% +40.07% (p=0.000 n=10+10) CountSingle/32-4 132MB/s ± 0% 184MB/s ± 1% +39.39% (p=0.000 n=10+9) CountSingle/4K-4 170MB/s ± 1% 263MB/s ± 1% +54.86% (p=0.000 n=10+10) CountSingle/4M-4 142MB/s ± 1% 197MB/s ± 1% +39.07% (p=0.000 n=10+9) CountSingle/64M-4 119MB/s ± 0% 162MB/s ± 1% +35.55% (p=0.000 n=8+10) Updates #29001 Change-Id: I42a268215a62044286ec32b548d8e4b86b9570ee Reviewed-on: https://go-review.googlesource.com/c/go/+/168319 Run-TryBot: Tobias Klauser TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall Reviewed-by: Cherry Zhang --- src/internal/bytealg/count_arm.s | 43 +++++++++++++++++++++++++++ src/internal/bytealg/count_generic.go | 2 +- src/internal/bytealg/count_native.go | 2 +- 3 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 src/internal/bytealg/count_arm.s diff --git a/src/internal/bytealg/count_arm.s b/src/internal/bytealg/count_arm.s new file mode 100644 index 0000000000..f704ea0c69 --- /dev/null +++ b/src/internal/bytealg/count_arm.s @@ -0,0 +1,43 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "go_asm.h" +#include "textflag.h" + +TEXT ·Count(SB),NOSPLIT,$0-20 + MOVW b_base+0(FP), R0 + MOVW b_len+4(FP), R1 + MOVBU c+12(FP), R2 + MOVW $ret+16(FP), R7 + B countbytebody<>(SB) + +TEXT ·CountString(SB),NOSPLIT,$0-16 + MOVW s_base+0(FP), R0 + MOVW s_len+4(FP), R1 + MOVBU c+8(FP), R2 + MOVW $ret+12(FP), R7 + B countbytebody<>(SB) + +// Input: +// R0: data +// R1: data length +// R2: byte to find +// R7: address to put result +// +// On exit: +// R4 and R8 are clobbered +TEXT countbytebody<>(SB),NOSPLIT,$0 + MOVW $0, R8 // R8 = count of byte to search + CMP $0, R1 + B.EQ done // short path to handle 0-byte case + ADD R0, R1 // R1 is the end of the range +byte_loop: + MOVBU.P 1(R0), R4 + CMP R4, R2 + ADD.EQ $1, R8 + CMP R0, R1 + B.NE byte_loop +done: + MOVW R8, (R7) + RET diff --git a/src/internal/bytealg/count_generic.go b/src/internal/bytealg/count_generic.go index e24b2b7fa0..13759ad496 100644 --- a/src/internal/bytealg/count_generic.go +++ b/src/internal/bytealg/count_generic.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build !amd64,!arm64,!ppc64le,!ppc64 +// +build !amd64,!arm,!arm64,!ppc64le,!ppc64 package bytealg diff --git a/src/internal/bytealg/count_native.go b/src/internal/bytealg/count_native.go index e6a91b3c0e..52b2a461a4 100644 --- a/src/internal/bytealg/count_native.go +++ b/src/internal/bytealg/count_native.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build amd64 arm64 ppc64le ppc64 +// +build amd64 arm arm64 ppc64le ppc64 package bytealg -- 2.50.0