]> Cypherpunks repositories - gostls13.git/commitdiff
crypto/cipher: use Neon for xor on arm64
authorMeng Zhuo <mzh@golangcn.org>
Mon, 26 Oct 2020 07:57:33 +0000 (15:57 +0800)
committerFilippo Valsorda <filippo@golang.org>
Sat, 7 Nov 2020 03:19:27 +0000 (03:19 +0000)
cpu: HiSilicon(R) Kirin 970 2.4GHz

name                 old time/op    new time/op    delta
XORBytes/8Bytes        39.8ns ± 0%    17.3ns ± 0%    -56.53%  (p=0.000 n=10+10)
XORBytes/128Bytes       376ns ± 0%      28ns ± 0%    -92.63%  (p=0.000 n=10+8)
XORBytes/2048Bytes     5.67µs ± 0%    0.22µs ± 0%    -96.03%  (p=0.000 n=10+10)
XORBytes/32768Bytes    90.3µs ± 0%     3.5µs ± 0%    -96.12%  (p=0.000 n=10+10)
AESGCMSeal1K            853ns ± 0%     853ns ± 0%       ~     (all equal)
AESGCMOpen1K            876ns ± 0%     874ns ± 0%     -0.23%  (p=0.000 n=10+10)
AESGCMSign8K           3.09µs ± 0%    3.08µs ± 0%     -0.34%  (p=0.000 n=10+9)
AESGCMSeal8K           5.87µs ± 0%    5.87µs ± 0%     +0.01%  (p=0.008 n=10+8)
AESGCMOpen8K           5.82µs ± 0%    5.82µs ± 0%     +0.02%  (p=0.037 n=10+10)
AESCFBEncrypt1K        7.05µs ± 0%    4.27µs ± 0%    -39.38%  (p=0.000 n=10+10)
AESCFBDecrypt1K        7.12µs ± 0%    4.30µs ± 0%    -39.54%  (p=0.000 n=10+9)
AESCFBDecrypt8K        56.7µs ± 0%    34.1µs ± 0%    -39.82%  (p=0.000 n=10+10)
AESOFB1K               5.20µs ± 0%    2.54µs ± 0%    -51.07%  (p=0.000 n=10+10)
AESCTR1K               4.96µs ± 0%    2.30µs ± 0%    -53.62%  (p=0.000 n=9+10)
AESCTR8K               39.5µs ± 0%    18.2µs ± 0%    -53.98%  (p=0.000 n=8+10)
AESCBCEncrypt1K        5.81µs ± 0%    3.07µs ± 0%    -47.13%  (p=0.000 n=10+8)
AESCBCDecrypt1K        5.83µs ± 0%    3.10µs ± 0%    -46.84%  (p=0.000 n=10+8)

name                 old speed      new speed      delta
XORBytes/8Bytes       201MB/s ± 0%   461MB/s ± 0%   +129.80%  (p=0.000 n=6+10)
XORBytes/128Bytes     340MB/s ± 0%  4625MB/s ± 0%  +1259.91%  (p=0.000 n=8+10)
XORBytes/2048Bytes    361MB/s ± 0%  9088MB/s ± 0%  +2414.23%  (p=0.000 n=8+10)
XORBytes/32768Bytes   363MB/s ± 0%  9350MB/s ± 0%  +2477.44%  (p=0.000 n=10+10)
AESGCMSeal1K         1.20GB/s ± 0%  1.20GB/s ± 0%     -0.02%  (p=0.041 n=10+10)
AESGCMOpen1K         1.17GB/s ± 0%  1.17GB/s ± 0%     +0.20%  (p=0.000 n=10+10)
AESGCMSign8K         2.65GB/s ± 0%  2.66GB/s ± 0%     +0.35%  (p=0.000 n=10+9)
AESGCMSeal8K         1.40GB/s ± 0%  1.40GB/s ± 0%     -0.01%  (p=0.000 n=10+7)
AESGCMOpen8K         1.41GB/s ± 0%  1.41GB/s ± 0%     -0.03%  (p=0.022 n=10+10)
AESCFBEncrypt1K       145MB/s ± 0%   238MB/s ± 0%    +64.95%  (p=0.000 n=10+10)
AESCFBDecrypt1K       143MB/s ± 0%   237MB/s ± 0%    +65.39%  (p=0.000 n=10+9)
AESCFBDecrypt8K       144MB/s ± 0%   240MB/s ± 0%    +66.15%  (p=0.000 n=10+10)
AESOFB1K              196MB/s ± 0%   401MB/s ± 0%   +104.35%  (p=0.000 n=9+10)
AESCTR1K              205MB/s ± 0%   443MB/s ± 0%   +115.57%  (p=0.000 n=7+10)
AESCTR8K              207MB/s ± 0%   450MB/s ± 0%   +117.27%  (p=0.000 n=10+10)
AESCBCEncrypt1K       176MB/s ± 0%   334MB/s ± 0%    +89.15%  (p=0.000 n=10+8)
AESCBCDecrypt1K       176MB/s ± 0%   330MB/s ± 0%    +88.08%  (p=0.000 n=10+9)

Updates #42010

Change-Id: I75e6d66fd0070e184d93b020c55a7580c713647c
Reviewed-on: https://go-review.googlesource.com/c/go/+/142537
Reviewed-by: Meng Zhuo <mzh@golangcn.org>
Reviewed-by: Filippo Valsorda <filippo@golang.org>
Run-TryBot: Meng Zhuo <mzh@golangcn.org>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Meng Zhuo <mzh@golangcn.org>

src/crypto/cipher/xor_arm64.go [new file with mode: 0644]
src/crypto/cipher/xor_arm64.s [new file with mode: 0644]
src/crypto/cipher/xor_generic.go

diff --git a/src/crypto/cipher/xor_arm64.go b/src/crypto/cipher/xor_arm64.go
new file mode 100644 (file)
index 0000000..35a785a
--- /dev/null
@@ -0,0 +1,29 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cipher
+
+// xorBytes xors the bytes in a and b. The destination should have enough
+// space, otherwise xorBytes will panic. Returns the number of bytes xor'd.
+func xorBytes(dst, a, b []byte) int {
+       n := len(a)
+       if len(b) < n {
+               n = len(b)
+       }
+       if n == 0 {
+               return 0
+       }
+       // make sure dst has enough space
+       _ = dst[n-1]
+
+       xorBytesARM64(&dst[0], &a[0], &b[0], n)
+       return n
+}
+
+func xorWords(dst, a, b []byte) {
+       xorBytes(dst, a, b)
+}
+
+//go:noescape
+func xorBytesARM64(dst, a, b *byte, n int)
diff --git a/src/crypto/cipher/xor_arm64.s b/src/crypto/cipher/xor_arm64.s
new file mode 100644 (file)
index 0000000..669852d
--- /dev/null
@@ -0,0 +1,67 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func xorBytesARM64(dst, a, b *byte, n int)
+TEXT ·xorBytesARM64(SB), NOSPLIT|NOFRAME, $0
+       MOVD    dst+0(FP), R0
+       MOVD    a+8(FP), R1
+       MOVD    b+16(FP), R2
+       MOVD    n+24(FP), R3
+       CMP     $64, R3
+       BLT     tail
+loop_64:
+       VLD1.P  64(R1), [V0.B16, V1.B16, V2.B16, V3.B16]
+       VLD1.P  64(R2), [V4.B16, V5.B16, V6.B16, V7.B16]
+       VEOR    V0.B16, V4.B16, V4.B16
+       VEOR    V1.B16, V5.B16, V5.B16
+       VEOR    V2.B16, V6.B16, V6.B16
+       VEOR    V3.B16, V7.B16, V7.B16
+       VST1.P  [V4.B16, V5.B16, V6.B16, V7.B16], 64(R0)
+       SUBS    $64, R3
+       CMP     $64, R3
+       BGE     loop_64
+tail:
+       // quick end
+       CBZ     R3, end
+       TBZ     $5, R3, less_than32
+       VLD1.P  32(R1), [V0.B16, V1.B16]
+       VLD1.P  32(R2), [V2.B16, V3.B16]
+       VEOR    V0.B16, V2.B16, V2.B16
+       VEOR    V1.B16, V3.B16, V3.B16
+       VST1.P  [V2.B16, V3.B16], 32(R0)
+less_than32:
+       TBZ     $4, R3, less_than16
+       LDP.P   16(R1), (R11, R12)
+       LDP.P   16(R2), (R13, R14)
+       EOR     R11, R13, R13
+       EOR     R12, R14, R14
+       STP.P   (R13, R14), 16(R0)
+less_than16:
+       TBZ     $3, R3, less_than8
+       MOVD.P  8(R1), R11
+       MOVD.P  8(R2), R12
+       EOR     R11, R12, R12
+       MOVD.P  R12, 8(R0)
+less_than8:
+       TBZ     $2, R3, less_than4
+       MOVWU.P 4(R1), R13
+       MOVWU.P 4(R2), R14
+       EORW    R13, R14, R14
+       MOVWU.P R14, 4(R0)
+less_than4:
+       TBZ     $1, R3, less_than2
+       MOVHU.P 2(R1), R15
+       MOVHU.P 2(R2), R16
+       EORW    R15, R16, R16
+       MOVHU.P R16, 2(R0)
+less_than2:
+       TBZ     $0, R3, end
+       MOVBU   (R1), R17
+       MOVBU   (R2), R19
+       EORW    R17, R19, R19
+       MOVBU   R19, (R0)
+end:
+       RET
index b7de60873c5608223b7fc3d34f239d4243bd39fd..ca9c4bbf39197a0cf5ad21dcb319e5cdf6c84b36 100644 (file)
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build !amd64,!ppc64,!ppc64le
+// +build !amd64,!ppc64,!ppc64le,!arm64
 
 package cipher