From: fanzha02 Date: Tue, 8 Aug 2017 06:13:43 +0000 (+0000) Subject: crypto/sha1: optimize arm64 sha1 implemention X-Git-Tag: go1.10beta1~286 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=44877315efba1c3e98ae7cde96fa46376fbaf0fd;p=gostls13.git crypto/sha1: optimize arm64 sha1 implemention Optimize with ARMv8 SHA1 instructions. Results (Cortex-A72) name old time/op new time/op delta Hash8Bytes-64 1.06µs ± 4% 0.56µs ± 4% -47.19% (p=0.008 n=5+5) Hash320Bytes-64 3.92µs ± 1% 0.82µs ± 2% -79.07% (p=0.008 n=5+5) Hash1K-64 10.2µs ± 2% 1.5µs ± 2% -85.71% (p=0.008 n=5+5) Hash8K-64 73.9µs ± 1% 7.6µs ± 1% -89.66% (p=0.008 n=5+5) name old speed new speed delta Hash8Bytes-64 7.55MB/s ± 4% 14.29MB/s ± 4% +89.27% (p=0.008 n=5+5) Hash320Bytes-64 81.6MB/s ± 1% 390.0MB/s ± 2% +377.64% (p=0.008 n=5+5) Hash1K-64 100MB/s ± 2% 701MB/s ± 2% +599.65% (p=0.008 n=5+5) Hash8K-64 111MB/s ± 1% 1072MB/s ± 1% +867.44% (p=0.008 n=5+5) Change-Id: I84397f980db9518f4150ac4c5ffa2c5a97a34444 Reviewed-on: https://go-review.googlesource.com/61550 Reviewed-by: Brad Fitzpatrick Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot --- diff --git a/src/crypto/sha1/sha1block_arm64.go b/src/crypto/sha1/sha1block_arm64.go new file mode 100644 index 0000000000..ab296e3793 --- /dev/null +++ b/src/crypto/sha1/sha1block_arm64.go @@ -0,0 +1,28 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sha1 + +import "internal/cpu" + +var k = []uint32{ + 0x5A827999, + 0x6ED9EBA1, + 0x8F1BBCDC, + 0xCA62C1D6, +} + +var hasSHA1 = cpu.ARM64.HasSHA1 + + +func sha1block(h []uint32, p []byte, k []uint32) + +func block(dig *digest, p []byte) { + if !hasSHA1 { + blockGeneric(dig, p) + } else { + h := dig.h[:] + sha1block(h, p, k) + } +} diff --git a/src/crypto/sha1/sha1block_arm64.s b/src/crypto/sha1/sha1block_arm64.s new file mode 100644 index 0000000000..4185cd60ba --- /dev/null +++ b/src/crypto/sha1/sha1block_arm64.s @@ -0,0 +1,152 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +#define HASHUPDATECHOOSE \ + SHA1C V16.S4, V1, V2 \ + SHA1H V3, V1 \ + VMOV V2.B16, V3.B16 + +#define HASHUPDATEPARITY \ + SHA1P V16.S4, V1, V2 \ + SHA1H V3, V1 \ + VMOV V2.B16, V3.B16 + +#define HASHUPDATEMAJ \ + SHA1M V16.S4, V1, V2 \ + SHA1H V3, V1 \ + VMOV V2.B16, V3.B16 + +// func sha1block(h []uint32, p []byte, k []uint32) +TEXT ·sha1block(SB),NOSPLIT,$0 + MOVD h_base+0(FP), R0 // hash value fisrt address + MOVD p_base+24(FP), R1 // message first address + MOVD k_base+48(FP), R2 // k constants first address + MOVD p_len+32(FP), R3 // message length + VLD1.P 16(R0), [V0.S4] + VMOVS (R0), V20 + SUB $16, R0, R0 + +blockloop: + + VLD1.P 16(R1), [V4.B16] // load message + VLD1.P 16(R1), [V5.B16] + VLD1.P 16(R1), [V6.B16] + VLD1.P 16(R1), [V7.B16] + VLD1 (R2), [V19.S4] // load constant k0-k79 + VMOV V0.B16, V2.B16 + VMOV V20.S[0], V1 + VMOV V2.B16, V3.B16 + VDUP V19.S[0], V17.S4 + VREV32 V4.B16, V4.B16 // prepare for using message in Byte format + VREV32 V5.B16, V5.B16 + VREV32 V6.B16, V6.B16 + VREV32 V7.B16, V7.B16 + + + VDUP V19.S[1], V18.S4 + VADD V17.S4, V4.S4, V16.S4 + SHA1SU0 V6.S4, V5.S4, V4.S4 + HASHUPDATECHOOSE + SHA1SU1 V7.S4, V4.S4 + + VADD V17.S4, V5.S4, V16.S4 + SHA1SU0 V7.S4, V6.S4, V5.S4 + HASHUPDATECHOOSE + SHA1SU1 V4.S4, V5.S4 + VADD V17.S4, V6.S4, V16.S4 + SHA1SU0 V4.S4, V7.S4, V6.S4 + HASHUPDATECHOOSE + SHA1SU1 V5.S4, V6.S4 + + VADD V17.S4, V7.S4, V16.S4 + SHA1SU0 V5.S4, V4.S4, V7.S4 + HASHUPDATECHOOSE + SHA1SU1 V6.S4, V7.S4 + + VADD V17.S4, V4.S4, V16.S4 + SHA1SU0 V6.S4, V5.S4, V4.S4 + HASHUPDATECHOOSE + SHA1SU1 V7.S4, V4.S4 + + VDUP V19.S[2], V17.S4 + VADD V18.S4, V5.S4, V16.S4 + SHA1SU0 V7.S4, V6.S4, V5.S4 + HASHUPDATEPARITY + SHA1SU1 V4.S4, V5.S4 + + VADD V18.S4, V6.S4, V16.S4 + SHA1SU0 V4.S4, V7.S4, V6.S4 + HASHUPDATEPARITY + SHA1SU1 V5.S4, V6.S4 + + VADD V18.S4, V7.S4, V16.S4 + SHA1SU0 V5.S4, V4.S4, V7.S4 + HASHUPDATEPARITY + SHA1SU1 V6.S4, V7.S4 + + VADD V18.S4, V4.S4, V16.S4 + SHA1SU0 V6.S4, V5.S4, V4.S4 + HASHUPDATEPARITY + SHA1SU1 V7.S4, V4.S4 + + VADD V18.S4, V5.S4, V16.S4 + SHA1SU0 V7.S4, V6.S4, V5.S4 + HASHUPDATEPARITY + SHA1SU1 V4.S4, V5.S4 + + VDUP V19.S[3], V18.S4 + VADD V17.S4, V6.S4, V16.S4 + SHA1SU0 V4.S4, V7.S4, V6.S4 + HASHUPDATEMAJ + SHA1SU1 V5.S4, V6.S4 + + VADD V17.S4, V7.S4, V16.S4 + SHA1SU0 V5.S4, V4.S4, V7.S4 + HASHUPDATEMAJ + SHA1SU1 V6.S4, V7.S4 + + VADD V17.S4, V4.S4, V16.S4 + SHA1SU0 V6.S4, V5.S4, V4.S4 + HASHUPDATEMAJ + SHA1SU1 V7.S4, V4.S4 + + VADD V17.S4, V5.S4, V16.S4 + SHA1SU0 V7.S4, V6.S4, V5.S4 + HASHUPDATEMAJ + SHA1SU1 V4.S4, V5.S4 + + VADD V17.S4, V6.S4, V16.S4 + SHA1SU0 V4.S4, V7.S4, V6.S4 + HASHUPDATEMAJ + SHA1SU1 V5.S4, V6.S4 + + VADD V18.S4, V7.S4, V16.S4 + SHA1SU0 V5.S4, V4.S4, V7.S4 + HASHUPDATEPARITY + SHA1SU1 V6.S4, V7.S4 + + VADD V18.S4, V4.S4, V16.S4 + HASHUPDATEPARITY + + VADD V18.S4, V5.S4, V16.S4 + HASHUPDATEPARITY + + VADD V18.S4, V6.S4, V16.S4 + HASHUPDATEPARITY + + VADD V18.S4, V7.S4, V16.S4 + HASHUPDATEPARITY + + SUB $64, R3, R3 // message length - 64bytes, then compare with 64bytes + VADD V2.S4, V0.S4, V0.S4 + VADD V1.S4, V20.S4, V20.S4 + CBNZ R3, blockloop + +sha1ret: + + VST1.P [V0.S4], 16(R0) // store hash value H(dcba) + VMOVS V20, (R0) // store hash value H(e) + RET diff --git a/src/crypto/sha1/sha1block_generic.go b/src/crypto/sha1/sha1block_generic.go index f0194626a6..5823e08941 100644 --- a/src/crypto/sha1/sha1block_generic.go +++ b/src/crypto/sha1/sha1block_generic.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build !amd64,!amd64p32,!386,!arm,!s390x +// +build !amd64,!amd64p32,!386,!arm,!s390x,!arm64 package sha1