]> Cypherpunks repositories - gostls13.git/commitdiff
crypto/sha1: optimize arm64 sha1 implemention
authorfanzha02 <fannie.zhang@arm.com>
Tue, 8 Aug 2017 06:13:43 +0000 (06:13 +0000)
committerBrad Fitzpatrick <bradfitz@golang.org>
Tue, 14 Nov 2017 18:17:59 +0000 (18:17 +0000)
Optimize with ARMv8 SHA1 instructions.
Results (Cortex-A72)

name             old time/op    new time/op     delta
Hash8Bytes-64      1.06µs ± 4%     0.56µs ± 4%   -47.19%  (p=0.008 n=5+5)
Hash320Bytes-64    3.92µs ± 1%     0.82µs ± 2%   -79.07%  (p=0.008 n=5+5)
Hash1K-64          10.2µs ± 2%      1.5µs ± 2%   -85.71%  (p=0.008 n=5+5)
Hash8K-64          73.9µs ± 1%      7.6µs ± 1%   -89.66%  (p=0.008 n=5+5)

name             old speed      new speed       delta
Hash8Bytes-64    7.55MB/s ± 4%  14.29MB/s ± 4%   +89.27%  (p=0.008 n=5+5)
Hash320Bytes-64  81.6MB/s ± 1%  390.0MB/s ± 2%  +377.64%  (p=0.008 n=5+5)
Hash1K-64         100MB/s ± 2%    701MB/s ± 2%  +599.65%  (p=0.008 n=5+5)
Hash8K-64         111MB/s ± 1%   1072MB/s ± 1%  +867.44%  (p=0.008 n=5+5)

Change-Id: I84397f980db9518f4150ac4c5ffa2c5a97a34444
Reviewed-on: https://go-review.googlesource.com/61550
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

src/crypto/sha1/sha1block_arm64.go [new file with mode: 0644]
src/crypto/sha1/sha1block_arm64.s [new file with mode: 0644]
src/crypto/sha1/sha1block_generic.go

diff --git a/src/crypto/sha1/sha1block_arm64.go b/src/crypto/sha1/sha1block_arm64.go
new file mode 100644 (file)
index 0000000..ab296e3
--- /dev/null
@@ -0,0 +1,28 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package sha1
+
+import "internal/cpu"
+
+var k = []uint32{
+       0x5A827999,
+       0x6ED9EBA1,
+       0x8F1BBCDC,
+       0xCA62C1D6,
+}
+
+var hasSHA1 = cpu.ARM64.HasSHA1
+
+
+func sha1block(h []uint32, p []byte, k []uint32)
+
+func block(dig *digest, p []byte) {
+       if !hasSHA1 {
+               blockGeneric(dig, p)
+       } else {
+               h := dig.h[:]
+               sha1block(h, p, k)
+       }
+}
diff --git a/src/crypto/sha1/sha1block_arm64.s b/src/crypto/sha1/sha1block_arm64.s
new file mode 100644 (file)
index 0000000..4185cd6
--- /dev/null
@@ -0,0 +1,152 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+#define HASHUPDATECHOOSE \
+       SHA1C   V16.S4, V1, V2 \
+       SHA1H   V3, V1 \
+       VMOV    V2.B16, V3.B16
+
+#define HASHUPDATEPARITY \
+       SHA1P   V16.S4, V1, V2 \
+       SHA1H   V3, V1 \
+       VMOV    V2.B16, V3.B16
+
+#define HASHUPDATEMAJ \
+       SHA1M   V16.S4, V1, V2 \
+       SHA1H   V3, V1 \
+       VMOV    V2.B16, V3.B16
+
+// func sha1block(h []uint32, p []byte, k []uint32)
+TEXT ·sha1block(SB),NOSPLIT,$0
+       MOVD    h_base+0(FP), R0                             // hash value fisrt address
+       MOVD    p_base+24(FP), R1                            // message first address
+       MOVD    k_base+48(FP), R2                            // k constants first address
+       MOVD    p_len+32(FP), R3                             // message length
+       VLD1.P  16(R0), [V0.S4]
+       VMOVS   (R0), V20
+       SUB     $16, R0, R0
+
+blockloop:
+
+       VLD1.P  16(R1), [V4.B16]                             // load message
+       VLD1.P  16(R1), [V5.B16]
+       VLD1.P  16(R1), [V6.B16]
+       VLD1.P  16(R1), [V7.B16]
+       VLD1    (R2), [V19.S4]                               // load constant k0-k79
+       VMOV    V0.B16, V2.B16
+       VMOV    V20.S[0], V1
+       VMOV    V2.B16, V3.B16
+       VDUP    V19.S[0], V17.S4
+       VREV32  V4.B16, V4.B16                               // prepare for using message in Byte format
+       VREV32  V5.B16, V5.B16
+       VREV32  V6.B16, V6.B16
+       VREV32  V7.B16, V7.B16
+
+
+       VDUP    V19.S[1], V18.S4
+       VADD    V17.S4, V4.S4, V16.S4
+       SHA1SU0 V6.S4, V5.S4, V4.S4
+       HASHUPDATECHOOSE
+       SHA1SU1 V7.S4, V4.S4
+
+       VADD    V17.S4, V5.S4, V16.S4
+       SHA1SU0 V7.S4, V6.S4, V5.S4
+       HASHUPDATECHOOSE
+       SHA1SU1 V4.S4, V5.S4
+       VADD    V17.S4, V6.S4, V16.S4
+       SHA1SU0 V4.S4, V7.S4, V6.S4
+       HASHUPDATECHOOSE
+       SHA1SU1 V5.S4, V6.S4
+
+       VADD    V17.S4, V7.S4, V16.S4
+       SHA1SU0 V5.S4, V4.S4, V7.S4
+       HASHUPDATECHOOSE
+       SHA1SU1 V6.S4, V7.S4
+
+       VADD    V17.S4, V4.S4, V16.S4
+       SHA1SU0 V6.S4, V5.S4, V4.S4
+       HASHUPDATECHOOSE
+       SHA1SU1 V7.S4, V4.S4
+
+       VDUP    V19.S[2], V17.S4
+       VADD    V18.S4, V5.S4, V16.S4
+       SHA1SU0 V7.S4, V6.S4, V5.S4
+       HASHUPDATEPARITY
+       SHA1SU1 V4.S4, V5.S4
+
+       VADD    V18.S4, V6.S4, V16.S4
+       SHA1SU0 V4.S4, V7.S4, V6.S4
+       HASHUPDATEPARITY
+       SHA1SU1 V5.S4, V6.S4
+
+       VADD    V18.S4, V7.S4, V16.S4
+       SHA1SU0 V5.S4, V4.S4, V7.S4
+       HASHUPDATEPARITY
+       SHA1SU1 V6.S4, V7.S4
+
+       VADD    V18.S4, V4.S4, V16.S4
+       SHA1SU0 V6.S4, V5.S4, V4.S4
+       HASHUPDATEPARITY
+       SHA1SU1 V7.S4, V4.S4
+
+       VADD    V18.S4, V5.S4, V16.S4
+       SHA1SU0 V7.S4, V6.S4, V5.S4
+       HASHUPDATEPARITY
+       SHA1SU1 V4.S4, V5.S4
+
+       VDUP    V19.S[3], V18.S4
+       VADD    V17.S4, V6.S4, V16.S4
+       SHA1SU0 V4.S4, V7.S4, V6.S4
+       HASHUPDATEMAJ
+       SHA1SU1 V5.S4, V6.S4
+
+       VADD    V17.S4, V7.S4, V16.S4
+       SHA1SU0 V5.S4, V4.S4, V7.S4
+       HASHUPDATEMAJ
+       SHA1SU1 V6.S4, V7.S4
+
+       VADD    V17.S4, V4.S4, V16.S4
+       SHA1SU0 V6.S4, V5.S4, V4.S4
+       HASHUPDATEMAJ
+       SHA1SU1 V7.S4, V4.S4
+
+       VADD    V17.S4, V5.S4, V16.S4
+       SHA1SU0 V7.S4, V6.S4, V5.S4
+       HASHUPDATEMAJ
+       SHA1SU1 V4.S4, V5.S4
+
+       VADD    V17.S4, V6.S4, V16.S4
+       SHA1SU0 V4.S4, V7.S4, V6.S4
+       HASHUPDATEMAJ
+       SHA1SU1 V5.S4, V6.S4
+
+       VADD    V18.S4, V7.S4, V16.S4
+       SHA1SU0 V5.S4, V4.S4, V7.S4
+       HASHUPDATEPARITY
+       SHA1SU1 V6.S4, V7.S4
+
+       VADD    V18.S4, V4.S4, V16.S4
+       HASHUPDATEPARITY
+
+       VADD    V18.S4, V5.S4, V16.S4
+       HASHUPDATEPARITY
+
+       VADD    V18.S4, V6.S4, V16.S4
+       HASHUPDATEPARITY
+
+       VADD    V18.S4, V7.S4, V16.S4
+       HASHUPDATEPARITY
+
+       SUB     $64, R3, R3                                  // message length - 64bytes, then compare with 64bytes
+       VADD    V2.S4, V0.S4, V0.S4
+       VADD    V1.S4, V20.S4, V20.S4
+       CBNZ    R3, blockloop
+
+sha1ret:
+
+       VST1.P  [V0.S4], 16(R0)                               // store hash value H(dcba)
+       VMOVS   V20, (R0)                                     // store hash value H(e)
+       RET
index f0194626a6f24dca6b24f4998ab991767b9e1217..5823e0894104196e3a33e66b0e241ed02410818a 100644 (file)
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build !amd64,!amd64p32,!386,!arm,!s390x
+// +build !amd64,!amd64p32,!386,!arm,!s390x,!arm64
 
 package sha1