]> Cypherpunks repositories - gostls13.git/commitdiff
internal/bytealg: add assembly implementation of Compare/CompareString on mips64x
authorMeng Zhuo <mengzhuo1203@gmail.com>
Wed, 25 Sep 2019 16:00:20 +0000 (00:00 +0800)
committerCherry Zhang <cherryyz@google.com>
Wed, 25 Sep 2019 22:03:38 +0000 (22:03 +0000)
name                         old time/op    new time/op    delta
BytesCompare/1                 28.9ns ± 4%    22.8ns ± 0%    -21.23%  (p=0.000 n=9+10)
BytesCompare/2                 34.6ns ± 0%    23.5ns ± 0%    -32.01%  (p=0.000 n=8+10)
BytesCompare/4                 54.6ns ± 0%    41.4ns ± 0%    -24.18%  (p=0.001 n=8+9)
BytesCompare/8                 73.9ns ± 0%    49.7ns ± 0%    -32.75%  (p=0.002 n=8+10)
BytesCompare/16                 113ns ± 0%      23ns ± 0%    -79.56%  (p=0.000 n=9+10)
BytesCompare/32                 190ns ± 0%      26ns ± 0%    -86.53%  (p=0.000 n=10+10)
BytesCompare/64                 345ns ± 0%      44ns ± 0%    -87.19%  (p=0.000 n=10+10)
BytesCompare/128                654ns ± 0%      52ns ± 0%    -91.97%  (p=0.000 n=9+8)
BytesCompare/256               1.27µs ± 0%    0.08µs ± 1%    -94.10%  (p=0.000 n=8+10)
BytesCompare/512               2.51µs ± 0%    0.12µs ± 0%    -95.26%  (p=0.000 n=9+9)
BytesCompare/1024              4.99µs ± 0%    0.21µs ± 1%    -95.84%  (p=0.000 n=8+10)
BytesCompare/2048              9.94µs ± 0%    0.38µs ± 0%    -96.13%  (p=0.000 n=8+10)
CompareBytesEqual               105ns ± 0%      64ns ± 0%    -39.05%  (p=0.000 n=10+9)
CompareBytesToNil              34.8ns ± 1%    39.5ns ± 3%    +13.48%  (p=0.000 n=10+10)
CompareBytesEmpty              33.6ns ± 3%    36.6ns ± 0%     +8.77%  (p=0.000 n=10+10)
CompareBytesIdentical          29.7ns ± 0%    36.6ns ± 0%    +23.23%  (p=0.000 n=10+10)
CompareBytesSameLength         69.1ns ± 0%    51.1ns ± 0%    -26.05%  (p=0.000 n=10+10)
CompareBytesDifferentLength    69.8ns ± 0%    51.1ns ± 0%    -26.79%  (p=0.000 n=10+10)
CompareBytesBigUnaligned       5.15ms ± 0%    2.18ms ± 0%    -57.62%  (p=0.000 n=9+9)
CompareBytesBig                5.28ms ± 0%    0.28ms ± 0%    -94.64%  (p=0.000 n=8+10)
CompareBytesBigIdentical       29.7ns ± 0%    36.8ns ± 0%    +23.91%  (p=0.000 n=8+8)

name                         old speed      new speed      delta
CompareBytesBigUnaligned      204MB/s ± 0%   480MB/s ± 0%   +135.94%  (p=0.000 n=9+9)
CompareBytesBig               198MB/s ± 0%  3703MB/s ± 0%  +1765.85%  (p=0.000 n=8+10)
CompareBytesBigIdentical     35.3TB/s ± 0%  28.5TB/s ± 0%    -19.31%  (p=0.000 n=8+8)

Change-Id: I112d9de2324986fd65ed237a86b11856a1c0f4a7
Reviewed-on: https://go-review.googlesource.com/c/go/+/196837
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>

src/internal/bytealg/compare_generic.go
src/internal/bytealg/compare_mips64x.s [new file with mode: 0644]
src/internal/bytealg/compare_native.go

index 2ac60f3df939d9c2a3e47cb83d735061c81ccc7d..4839df9528f0828bbe93b9fddf3e5aaf1cad3d90 100644 (file)
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build !386,!amd64,!amd64p32,!s390x,!arm,!arm64,!ppc64,!ppc64le,!mips,!mipsle,!wasm
+// +build !386,!amd64,!amd64p32,!s390x,!arm,!arm64,!ppc64,!ppc64le,!mips,!mipsle,!wasm,!mips64,!mips64le
 
 package bytealg
 
diff --git a/src/internal/bytealg/compare_mips64x.s b/src/internal/bytealg/compare_mips64x.s
new file mode 100644 (file)
index 0000000..7bed4d2
--- /dev/null
@@ -0,0 +1,92 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build mips64 mips64le
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Compare(SB),NOSPLIT,$0-56
+       MOVV    a_base+0(FP), R3
+       MOVV    b_base+24(FP), R4
+       MOVV    a_len+8(FP), R1
+       MOVV    b_len+32(FP), R2
+       MOVV    $ret+48(FP), R9
+       JMP     cmpbody<>(SB)
+
+TEXT runtime·cmpstring(SB),NOSPLIT,$0-40
+       MOVV    a_base+0(FP), R3
+       MOVV    b_base+16(FP), R4
+       MOVV    a_len+8(FP), R1
+       MOVV    b_len+24(FP), R2
+       MOVV    $ret+32(FP), R9
+       JMP     cmpbody<>(SB)
+
+// On entry:
+// R1 length of a
+// R2 length of b
+// R3 points to the start of a
+// R4 points to the start of b
+// R9 points to the return value (-1/0/1)
+//
+// On exit:
+// R6, R7, R8, R10, R11, R13, R14 clobbered
+TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0
+       BEQ     R3, R4, samebytes // same start of a and b
+
+       SGTU    R1, R2, R7
+       BNE     R0, R7, r2_lt_r1
+       MOVV    R1, R10
+       JMP     entry
+r2_lt_r1:
+       MOVV    R2, R10 // R10 is min(R1, R2)
+entry:
+       ADDV    R3, R10, R8     // R3 start of a, R8 end of a
+       BEQ     R3, R8, samebytes // length is 0
+
+       SRLV    $4, R10         // R10 is number of chunks
+       BEQ     R0, R10, byte_loop
+
+       // make sure both a and b are aligned.
+       OR      R3, R4, R11
+       AND     $7, R11
+       BNE     R0, R11, byte_loop
+
+chunk16_loop:
+       BEQ     R0, R10, byte_loop
+       MOVV    (R3), R6
+       MOVV    (R4), R7
+       BNE     R6, R7, byte_cmp
+       MOVV    8(R3), R13
+       MOVV    8(R4), R14
+       ADDV    $16, R3
+       ADDV    $16, R4
+       SUBVU   $1, R10
+       BEQ     R13, R14, chunk16_loop
+       MOVV    R13, R6
+       MOVV    R14, R7
+       JMP     byte_cmp
+
+byte_loop:
+       BEQ     R3, R8, samebytes
+       MOVBU   (R3), R6
+       ADDVU   $1, R3
+       MOVBU   (R4), R7
+       ADDVU   $1, R4
+       BEQ     R6, R7, byte_loop
+
+byte_cmp:
+       SGTU    R6, R7, R8 // R8 = 1 if (R6 > R7)
+       BNE     R0, R8, ret
+       MOVV    $-1, R8
+       JMP     ret
+
+samebytes:
+       SGTU    R1, R2, R6
+       SGTU    R2, R1, R7
+       SUBV    R7, R6, R8
+
+ret:
+       MOVV    R8, (R9)
+       RET
index b14aa8c72c94cb7af10c0ac16bc01af7a4c1478b..051b2ae07918172105bcc27735ce68dcad7f28a2 100644 (file)
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build 386 amd64 amd64p32 s390x arm arm64 ppc64 ppc64le mips mipsle wasm
+// +build 386 amd64 amd64p32 s390x arm arm64 ppc64 ppc64le mips mipsle mips64 mips64le wasm
 
 package bytealg