]> Cypherpunks repositories - gostls13.git/commitdiff
internal/bytealg: (re)adding mips64x compare implementation
authorMeng Zhuo <mengzhuo1203@gmail.com>
Wed, 2 Oct 2019 08:06:25 +0000 (16:06 +0800)
committerCherry Zhang <cherryyz@google.com>
Thu, 3 Oct 2019 20:32:21 +0000 (20:32 +0000)
The original CL of mips64x compare function has been reverted due to wrong
implement for little endian.
Original CL: https://go-review.googlesource.com/c/go/+/196837

name                         old time/op    new time/op    delta
BytesCompare/1                 28.9ns ± 4%    22.1ns ± 0%    -23.60%  (p=0.000 n=9+8)
BytesCompare/2                 34.6ns ± 0%    23.1ns ± 0%    -33.25%  (p=0.000 n=8+10)
BytesCompare/4                 54.6ns ± 0%    40.8ns ± 0%    -25.27%  (p=0.000 n=8+8)
BytesCompare/8                 73.9ns ± 0%    49.1ns ± 0%    -33.56%  (p=0.000 n=8+8)
BytesCompare/16                 113ns ± 0%      24ns ± 0%    -79.20%  (p=0.000 n=9+9)
BytesCompare/32                 190ns ± 0%      26ns ± 0%    -86.53%  (p=0.000 n=10+10)
BytesCompare/64                 345ns ± 0%      44ns ± 0%    -87.19%  (p=0.000 n=10+8)
BytesCompare/128                654ns ± 0%      52ns ± 0%    -91.97%  (p=0.000 n=9+8)
BytesCompare/256               1.27µs ± 0%    0.07µs ± 0%    -94.14%  (p=0.001 n=8+9)
BytesCompare/512               2.51µs ± 0%    0.12µs ± 0%    -95.26%  (p=0.000 n=9+10)
BytesCompare/1024              4.99µs ± 0%    0.21µs ± 0%    -95.85%  (p=0.000 n=8+10)
BytesCompare/2048              9.94µs ± 0%    0.38µs ± 0%    -96.14%  (p=0.000 n=8+8)
CompareBytesEqual               105ns ± 0%      64ns ± 0%    -39.43%  (p=0.000 n=10+9)
CompareBytesToNil              34.8ns ± 1%    38.6ns ± 3%    +11.01%  (p=0.000 n=10+10)
CompareBytesEmpty              33.6ns ± 3%    36.6ns ± 0%     +8.77%  (p=0.000 n=10+8)
CompareBytesIdentical          29.7ns ± 0%    40.5ns ± 1%    +36.45%  (p=0.000 n=10+8)
CompareBytesSameLength         69.1ns ± 0%    51.8ns ± 0%    -25.04%  (p=0.000 n=10+9)
CompareBytesDifferentLength    69.8ns ± 0%    52.5ns ± 0%    -24.79%  (p=0.000 n=10+8)
CompareBytesBigUnaligned       5.15ms ± 0%    2.19ms ± 0%    -57.59%  (p=0.000 n=9+9)
CompareBytesBig                5.28ms ± 0%    0.28ms ± 0%    -94.64%  (p=0.000 n=8+8)
CompareBytesBigIdentical       29.7ns ± 0%    36.9ns ± 2%    +24.11%  (p=0.000 n=8+10)

name                         old speed      new speed      delta
CompareBytesBigUnaligned      204MB/s ± 0%   480MB/s ± 0%   +135.77%  (p=0.000 n=9+9)
CompareBytesBig               198MB/s ± 0%  3704MB/s ± 0%  +1765.97%  (p=0.000 n=8+8)
CompareBytesBigIdentical     35.3TB/s ± 0%  28.4TB/s ± 2%    -19.44%  (p=0.000 n=8+10)

Fixes #34549

Change-Id: I2ef29f13cdd4229745ac2d018bb53c76f2ff1209
Reviewed-on: https://go-review.googlesource.com/c/go/+/197557
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>

src/internal/bytealg/compare_generic.go
src/internal/bytealg/compare_mips64x.s [new file with mode: 0644]
src/internal/bytealg/compare_native.go

index 2ac60f3df939d9c2a3e47cb83d735061c81ccc7d..4839df9528f0828bbe93b9fddf3e5aaf1cad3d90 100644 (file)
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build !386,!amd64,!amd64p32,!s390x,!arm,!arm64,!ppc64,!ppc64le,!mips,!mipsle,!wasm
+// +build !386,!amd64,!amd64p32,!s390x,!arm,!arm64,!ppc64,!ppc64le,!mips,!mipsle,!wasm,!mips64,!mips64le
 
 package bytealg
 
diff --git a/src/internal/bytealg/compare_mips64x.s b/src/internal/bytealg/compare_mips64x.s
new file mode 100644 (file)
index 0000000..4f05fce
--- /dev/null
@@ -0,0 +1,88 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build mips64 mips64le
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Compare(SB),NOSPLIT,$0-56
+       MOVV    a_base+0(FP), R3
+       MOVV    b_base+24(FP), R4
+       MOVV    a_len+8(FP), R1
+       MOVV    b_len+32(FP), R2
+       MOVV    $ret+48(FP), R9
+       JMP     cmpbody<>(SB)
+
+TEXT runtime·cmpstring(SB),NOSPLIT,$0-40
+       MOVV    a_base+0(FP), R3
+       MOVV    b_base+16(FP), R4
+       MOVV    a_len+8(FP), R1
+       MOVV    b_len+24(FP), R2
+       MOVV    $ret+32(FP), R9
+       JMP     cmpbody<>(SB)
+
+// On entry:
+// R1 length of a
+// R2 length of b
+// R3 points to the start of a
+// R4 points to the start of b
+// R9 points to the return value (-1/0/1)
+TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0
+       BEQ     R3, R4, samebytes // same start of a and b
+
+       SGTU    R1, R2, R7
+       BNE     R0, R7, r2_lt_r1
+       MOVV    R1, R10
+       JMP     entry
+r2_lt_r1:
+       MOVV    R2, R10 // R10 is min(R1, R2)
+entry:
+       ADDV    R3, R10, R8     // R3 start of a, R8 end of a
+       BEQ     R3, R8, samebytes // length is 0
+
+       SRLV    $4, R10         // R10 is number of chunks
+       BEQ     R0, R10, byte_loop
+
+       // make sure both a and b are aligned.
+       OR      R3, R4, R11
+       AND     $7, R11
+       BNE     R0, R11, byte_loop
+
+chunk16_loop:
+       BEQ     R0, R10, byte_loop
+       MOVV    (R3), R6
+       MOVV    (R4), R7
+       BNE     R6, R7, byte_loop
+       MOVV    8(R3), R13
+       MOVV    8(R4), R14
+       ADDV    $16, R3
+       ADDV    $16, R4
+       SUBVU   $1, R10
+       BEQ     R13, R14, chunk16_loop
+       SUBV    $8, R3
+       SUBV    $8, R4
+
+byte_loop:
+       BEQ     R3, R8, samebytes
+       MOVBU   (R3), R6
+       ADDVU   $1, R3
+       MOVBU   (R4), R7
+       ADDVU   $1, R4
+       BEQ     R6, R7, byte_loop
+
+byte_cmp:
+       SGTU    R6, R7, R8 // R8 = 1 if (R6 > R7)
+       BNE     R0, R8, ret
+       MOVV    $-1, R8
+       JMP     ret
+
+samebytes:
+       SGTU    R1, R2, R6
+       SGTU    R2, R1, R7
+       SUBV    R7, R6, R8
+
+ret:
+       MOVV    R8, (R9)
+       RET
index b14aa8c72c94cb7af10c0ac16bc01af7a4c1478b..95486e85429c1bd35cd07f2ad1774744710a3793 100644 (file)
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build 386 amd64 amd64p32 s390x arm arm64 ppc64 ppc64le mips mipsle wasm
+// +build 386 amd64 amd64p32 s390x arm arm64 ppc64 ppc64le mips mipsle wasm mips64 mips64le
 
 package bytealg