From: Julian Zhu Date: Tue, 3 Jun 2025 17:11:15 +0000 (+0800) Subject: crypto/sha512: use const table for key loading on loong64 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=17a8be71178dab07438cfffb84d2588f2a66bea1;p=gostls13.git crypto/sha512: use const table for key loading on loong64 Load constant keys from a static memory table rather than loading immediates into registers on loong64. Benchmark for Loongson-3A5000: goos: linux goarch: loong64 pkg: crypto/sha512 cpu: Loongson-3A5000-HV @ 2500.00MHz │ sha512o │ sha512n │ │ sec/op │ sec/op vs base │ Hash8Bytes/New-4 489.1n ± 0% 464.7n ± 0% -5.00% (p=0.000 n=8) Hash8Bytes/Sum384-4 499.1n ± 0% 474.6n ± 0% -4.92% (p=0.000 n=8) Hash8Bytes/Sum512-4 506.6n ± 0% 481.9n ± 0% -4.86% (p=0.000 n=8) Hash1K/New-4 3.371µ ± 0% 3.152µ ± 0% -6.51% (p=0.000 n=8) Hash1K/Sum384-4 3.385µ ± 0% 3.164µ ± 0% -6.53% (p=0.000 n=8) Hash1K/Sum512-4 3.392µ ± 0% 3.170µ ± 0% -6.54% (p=0.000 n=8) Hash8K/New-4 23.62µ ± 0% 22.01µ ± 0% -6.82% (p=0.000 n=8) Hash8K/Sum384-4 23.63µ ± 0% 22.02µ ± 0% -6.82% (p=0.000 n=8) Hash8K/Sum512-4 23.64µ ± 0% 22.02µ ± 0% -6.86% (p=0.000 n=8) geomean 3.415µ 3.207µ -6.10% │ sha512o │ sha512n │ │ B/s │ B/s vs base │ Hash8Bytes/New-4 15.60Mi ± 0% 16.42Mi ± 0% +5.29% (p=0.000 n=8) Hash8Bytes/Sum384-4 15.29Mi ± 0% 16.08Mi ± 0% +5.18% (p=0.000 n=8) Hash8Bytes/Sum512-4 15.06Mi ± 0% 15.83Mi ± 0% +5.13% (p=0.000 n=8) Hash1K/New-4 289.7Mi ± 0% 309.9Mi ± 0% +6.97% (p=0.000 n=8) Hash1K/Sum384-4 288.5Mi ± 0% 308.6Mi ± 0% +6.97% (p=0.000 n=8) Hash1K/Sum512-4 287.9Mi ± 0% 308.0Mi ± 0% +7.00% (p=0.000 n=8) Hash8K/New-4 330.8Mi ± 0% 355.0Mi ± 0% +7.32% (p=0.000 n=8) Hash8K/Sum384-4 330.6Mi ± 0% 354.9Mi ± 0% +7.32% (p=0.000 n=8) Hash8K/Sum512-4 330.5Mi ± 0% 354.8Mi ± 0% +7.36% (p=0.000 n=8) geomean 113.5Mi 120.9Mi +6.50% Benchmark for Loongson-3A6000: goos: linux goarch: loong64 pkg: crypto/sha512 cpu: Loongson-3A6000 @ 2500.00MHz │ sha512.old │ sha512.new │ │ sec/op │ sec/op vs base │ Hash8Bytes/New-8 397.2n ± 0% 380.6n ± 0% -4.17% (p=0.000 n=10) Hash8Bytes/Sum384-8 406.1n ± 0% 397.9n ± 0% -2.02% (p=0.000 n=10) Hash8Bytes/Sum512-8 410.1n ± 0% 395.8n ± 1% -3.50% (p=0.000 n=10) Hash1K/New-8 2.932µ ± 0% 2.800µ ± 0% -4.50% (p=0.000 n=10) Hash1K/Sum384-8 2.941µ ± 0% 2.812µ ± 0% -4.39% (p=0.000 n=10) Hash1K/Sum512-8 2.947µ ± 0% 2.814µ ± 0% -4.50% (p=0.000 n=10) Hash8K/New-8 20.68µ ± 0% 19.73µ ± 1% -4.58% (p=0.000 n=10) Hash8K/Sum384-8 20.69µ ± 0% 19.73µ ± 0% -4.62% (p=0.000 n=10) Hash8K/Sum512-8 20.70µ ± 0% 19.75µ ± 0% -4.60% (p=0.000 n=10) geomean 2.908µ 2.789µ -4.10% │ sha512.old │ sha512.new │ │ B/s │ B/s vs base │ Hash8Bytes/New-8 19.21Mi ± 0% 20.05Mi ± 0% +4.37% (p=0.000 n=10) Hash8Bytes/Sum384-8 18.79Mi ± 0% 19.18Mi ± 0% +2.08% (p=0.000 n=10) Hash8Bytes/Sum512-8 18.60Mi ± 0% 19.28Mi ± 1% +3.64% (p=0.000 n=10) Hash1K/New-8 333.1Mi ± 0% 348.8Mi ± 0% +4.71% (p=0.000 n=10) Hash1K/Sum384-8 332.0Mi ± 0% 347.3Mi ± 0% +4.60% (p=0.000 n=10) Hash1K/Sum512-8 331.5Mi ± 0% 347.0Mi ± 0% +4.69% (p=0.000 n=10) Hash8K/New-8 377.8Mi ± 0% 396.0Mi ± 1% +4.80% (p=0.000 n=10) Hash8K/Sum384-8 377.7Mi ± 0% 396.0Mi ± 0% +4.85% (p=0.000 n=10) Hash8K/Sum512-8 377.5Mi ± 0% 395.7Mi ± 0% +4.82% (p=0.000 n=10) geomean 133.3Mi 139.0Mi +4.28% Change-Id: I55ae4a8e4b0c51a98583f654158235fe738cf348 Reviewed-on: https://go-review.googlesource.com/c/go/+/678436 Reviewed-by: sophie zhao Reviewed-by: Mark Freeman LUCI-TryBot-Result: Go LUCI Reviewed-by: Dmitri Shuralyov Reviewed-by: abner chenc --- diff --git a/src/crypto/internal/fips140/sha512/sha512block_loong64.s b/src/crypto/internal/fips140/sha512/sha512block_loong64.s index f65d563ca3..751ab4e4f6 100644 --- a/src/crypto/internal/fips140/sha512/sha512block_loong64.s +++ b/src/crypto/internal/fips140/sha512/sha512block_loong64.s @@ -14,6 +14,7 @@ #define REGTMP3 R18 #define REGTMP4 R7 #define REGTMP5 R6 +#define REG_KT R19 // W[i] = M[i]; for 0 <= i <= 15 #define LOAD0(index) \ @@ -52,8 +53,9 @@ // Ch(x, y, z) = (x AND y) XOR (NOT x AND z) // = ((y XOR z) AND x) XOR z // Calculate T1 in REGTMP4 -#define SHA512T1(const, e, f, g, h) \ - ADDV $const, h; \ +#define SHA512T1(index, e, f, g, h) \ + MOVV (index*8)(REG_KT), REGTMP5; \ + ADDV REGTMP5, h; \ ADDV REGTMP4, h; \ ROTRV $14, e, REGTMP5; \ ROTRV $18, e, REGTMP; \ @@ -85,19 +87,19 @@ // Calculate T1 and T2, then e = d + T1 and a = T1 + T2. // The values for e and a are stored in d and h, ready for rotation. -#define SHA512ROUND(const, a, b, c, d, e, f, g, h) \ - SHA512T1(const, e, f, g, h); \ +#define SHA512ROUND(index, a, b, c, d, e, f, g, h) \ + SHA512T1(index, e, f, g, h); \ SHA512T2(a, b, c); \ ADDV REGTMP4, d; \ ADDV REGTMP1, REGTMP4, h -#define SHA512ROUND0(index, const, a, b, c, d, e, f, g, h) \ +#define SHA512ROUND0(index, a, b, c, d, e, f, g, h) \ LOAD0(index); \ - SHA512ROUND(const, a, b, c, d, e, f, g, h) + SHA512ROUND(index, a, b, c, d, e, f, g, h) -#define SHA512ROUND1(index, const, a, b, c, d, e, f, g, h) \ +#define SHA512ROUND1(index, a, b, c, d, e, f, g, h) \ LOAD1(index); \ - SHA512ROUND(const, a, b, c, d, e, f, g, h) + SHA512ROUND(index, a, b, c, d, e, f, g, h) // A stack frame size of 128 bytes is required here, because // the frame size used for data expansion is 128 bytes. @@ -110,6 +112,8 @@ TEXT ·block(SB),NOSPLIT,$128-32 AND $~127, R6 BEQ R6, end + MOVV $·_K(SB), REG_KT // const table + // p_len >= 128 MOVV dig+0(FP), R4 ADDV R5, R6, R25 @@ -123,87 +127,87 @@ TEXT ·block(SB),NOSPLIT,$128-32 MOVV (7*8)(R4), R15 // h = H7 loop: - SHA512ROUND0( 0, 0x428a2f98d728ae22, R8, R9, R10, R11, R12, R13, R14, R15) - SHA512ROUND0( 1, 0x7137449123ef65cd, R15, R8, R9, R10, R11, R12, R13, R14) - SHA512ROUND0( 2, 0xb5c0fbcfec4d3b2f, R14, R15, R8, R9, R10, R11, R12, R13) - SHA512ROUND0( 3, 0xe9b5dba58189dbbc, R13, R14, R15, R8, R9, R10, R11, R12) - SHA512ROUND0( 4, 0x3956c25bf348b538, R12, R13, R14, R15, R8, R9, R10, R11) - SHA512ROUND0( 5, 0x59f111f1b605d019, R11, R12, R13, R14, R15, R8, R9, R10) - SHA512ROUND0( 6, 0x923f82a4af194f9b, R10, R11, R12, R13, R14, R15, R8, R9) - SHA512ROUND0( 7, 0xab1c5ed5da6d8118, R9, R10, R11, R12, R13, R14, R15, R8) - SHA512ROUND0( 8, 0xd807aa98a3030242, R8, R9, R10, R11, R12, R13, R14, R15) - SHA512ROUND0( 9, 0x12835b0145706fbe, R15, R8, R9, R10, R11, R12, R13, R14) - SHA512ROUND0(10, 0x243185be4ee4b28c, R14, R15, R8, R9, R10, R11, R12, R13) - SHA512ROUND0(11, 0x550c7dc3d5ffb4e2, R13, R14, R15, R8, R9, R10, R11, R12) - SHA512ROUND0(12, 0x72be5d74f27b896f, R12, R13, R14, R15, R8, R9, R10, R11) - SHA512ROUND0(13, 0x80deb1fe3b1696b1, R11, R12, R13, R14, R15, R8, R9, R10) - SHA512ROUND0(14, 0x9bdc06a725c71235, R10, R11, R12, R13, R14, R15, R8, R9) - SHA512ROUND0(15, 0xc19bf174cf692694, R9, R10, R11, R12, R13, R14, R15, R8) - - SHA512ROUND1(16, 0xe49b69c19ef14ad2, R8, R9, R10, R11, R12, R13, R14, R15) - SHA512ROUND1(17, 0xefbe4786384f25e3, R15, R8, R9, R10, R11, R12, R13, R14) - SHA512ROUND1(18, 0x0fc19dc68b8cd5b5, R14, R15, R8, R9, R10, R11, R12, R13) - SHA512ROUND1(19, 0x240ca1cc77ac9c65, R13, R14, R15, R8, R9, R10, R11, R12) - SHA512ROUND1(20, 0x2de92c6f592b0275, R12, R13, R14, R15, R8, R9, R10, R11) - SHA512ROUND1(21, 0x4a7484aa6ea6e483, R11, R12, R13, R14, R15, R8, R9, R10) - SHA512ROUND1(22, 0x5cb0a9dcbd41fbd4, R10, R11, R12, R13, R14, R15, R8, R9) - SHA512ROUND1(23, 0x76f988da831153b5, R9, R10, R11, R12, R13, R14, R15, R8) - SHA512ROUND1(24, 0x983e5152ee66dfab, R8, R9, R10, R11, R12, R13, R14, R15) - SHA512ROUND1(25, 0xa831c66d2db43210, R15, R8, R9, R10, R11, R12, R13, R14) - SHA512ROUND1(26, 0xb00327c898fb213f, R14, R15, R8, R9, R10, R11, R12, R13) - SHA512ROUND1(27, 0xbf597fc7beef0ee4, R13, R14, R15, R8, R9, R10, R11, R12) - SHA512ROUND1(28, 0xc6e00bf33da88fc2, R12, R13, R14, R15, R8, R9, R10, R11) - SHA512ROUND1(29, 0xd5a79147930aa725, R11, R12, R13, R14, R15, R8, R9, R10) - SHA512ROUND1(30, 0x06ca6351e003826f, R10, R11, R12, R13, R14, R15, R8, R9) - SHA512ROUND1(31, 0x142929670a0e6e70, R9, R10, R11, R12, R13, R14, R15, R8) - SHA512ROUND1(32, 0x27b70a8546d22ffc, R8, R9, R10, R11, R12, R13, R14, R15) - SHA512ROUND1(33, 0x2e1b21385c26c926, R15, R8, R9, R10, R11, R12, R13, R14) - SHA512ROUND1(34, 0x4d2c6dfc5ac42aed, R14, R15, R8, R9, R10, R11, R12, R13) - SHA512ROUND1(35, 0x53380d139d95b3df, R13, R14, R15, R8, R9, R10, R11, R12) - SHA512ROUND1(36, 0x650a73548baf63de, R12, R13, R14, R15, R8, R9, R10, R11) - SHA512ROUND1(37, 0x766a0abb3c77b2a8, R11, R12, R13, R14, R15, R8, R9, R10) - SHA512ROUND1(38, 0x81c2c92e47edaee6, R10, R11, R12, R13, R14, R15, R8, R9) - SHA512ROUND1(39, 0x92722c851482353b, R9, R10, R11, R12, R13, R14, R15, R8) - SHA512ROUND1(40, 0xa2bfe8a14cf10364, R8, R9, R10, R11, R12, R13, R14, R15) - SHA512ROUND1(41, 0xa81a664bbc423001, R15, R8, R9, R10, R11, R12, R13, R14) - SHA512ROUND1(42, 0xc24b8b70d0f89791, R14, R15, R8, R9, R10, R11, R12, R13) - SHA512ROUND1(43, 0xc76c51a30654be30, R13, R14, R15, R8, R9, R10, R11, R12) - SHA512ROUND1(44, 0xd192e819d6ef5218, R12, R13, R14, R15, R8, R9, R10, R11) - SHA512ROUND1(45, 0xd69906245565a910, R11, R12, R13, R14, R15, R8, R9, R10) - SHA512ROUND1(46, 0xf40e35855771202a, R10, R11, R12, R13, R14, R15, R8, R9) - SHA512ROUND1(47, 0x106aa07032bbd1b8, R9, R10, R11, R12, R13, R14, R15, R8) - SHA512ROUND1(48, 0x19a4c116b8d2d0c8, R8, R9, R10, R11, R12, R13, R14, R15) - SHA512ROUND1(49, 0x1e376c085141ab53, R15, R8, R9, R10, R11, R12, R13, R14) - SHA512ROUND1(50, 0x2748774cdf8eeb99, R14, R15, R8, R9, R10, R11, R12, R13) - SHA512ROUND1(51, 0x34b0bcb5e19b48a8, R13, R14, R15, R8, R9, R10, R11, R12) - SHA512ROUND1(52, 0x391c0cb3c5c95a63, R12, R13, R14, R15, R8, R9, R10, R11) - SHA512ROUND1(53, 0x4ed8aa4ae3418acb, R11, R12, R13, R14, R15, R8, R9, R10) - SHA512ROUND1(54, 0x5b9cca4f7763e373, R10, R11, R12, R13, R14, R15, R8, R9) - SHA512ROUND1(55, 0x682e6ff3d6b2b8a3, R9, R10, R11, R12, R13, R14, R15, R8) - SHA512ROUND1(56, 0x748f82ee5defb2fc, R8, R9, R10, R11, R12, R13, R14, R15) - SHA512ROUND1(57, 0x78a5636f43172f60, R15, R8, R9, R10, R11, R12, R13, R14) - SHA512ROUND1(58, 0x84c87814a1f0ab72, R14, R15, R8, R9, R10, R11, R12, R13) - SHA512ROUND1(59, 0x8cc702081a6439ec, R13, R14, R15, R8, R9, R10, R11, R12) - SHA512ROUND1(60, 0x90befffa23631e28, R12, R13, R14, R15, R8, R9, R10, R11) - SHA512ROUND1(61, 0xa4506cebde82bde9, R11, R12, R13, R14, R15, R8, R9, R10) - SHA512ROUND1(62, 0xbef9a3f7b2c67915, R10, R11, R12, R13, R14, R15, R8, R9) - SHA512ROUND1(63, 0xc67178f2e372532b, R9, R10, R11, R12, R13, R14, R15, R8) - SHA512ROUND1(64, 0xca273eceea26619c, R8, R9, R10, R11, R12, R13, R14, R15) - SHA512ROUND1(65, 0xd186b8c721c0c207, R15, R8, R9, R10, R11, R12, R13, R14) - SHA512ROUND1(66, 0xeada7dd6cde0eb1e, R14, R15, R8, R9, R10, R11, R12, R13) - SHA512ROUND1(67, 0xf57d4f7fee6ed178, R13, R14, R15, R8, R9, R10, R11, R12) - SHA512ROUND1(68, 0x06f067aa72176fba, R12, R13, R14, R15, R8, R9, R10, R11) - SHA512ROUND1(69, 0x0a637dc5a2c898a6, R11, R12, R13, R14, R15, R8, R9, R10) - SHA512ROUND1(70, 0x113f9804bef90dae, R10, R11, R12, R13, R14, R15, R8, R9) - SHA512ROUND1(71, 0x1b710b35131c471b, R9, R10, R11, R12, R13, R14, R15, R8) - SHA512ROUND1(72, 0x28db77f523047d84, R8, R9, R10, R11, R12, R13, R14, R15) - SHA512ROUND1(73, 0x32caab7b40c72493, R15, R8, R9, R10, R11, R12, R13, R14) - SHA512ROUND1(74, 0x3c9ebe0a15c9bebc, R14, R15, R8, R9, R10, R11, R12, R13) - SHA512ROUND1(75, 0x431d67c49c100d4c, R13, R14, R15, R8, R9, R10, R11, R12) - SHA512ROUND1(76, 0x4cc5d4becb3e42b6, R12, R13, R14, R15, R8, R9, R10, R11) - SHA512ROUND1(77, 0x597f299cfc657e2a, R11, R12, R13, R14, R15, R8, R9, R10) - SHA512ROUND1(78, 0x5fcb6fab3ad6faec, R10, R11, R12, R13, R14, R15, R8, R9) - SHA512ROUND1(79, 0x6c44198c4a475817, R9, R10, R11, R12, R13, R14, R15, R8) + SHA512ROUND0( 0, R8, R9, R10, R11, R12, R13, R14, R15) + SHA512ROUND0( 1, R15, R8, R9, R10, R11, R12, R13, R14) + SHA512ROUND0( 2, R14, R15, R8, R9, R10, R11, R12, R13) + SHA512ROUND0( 3, R13, R14, R15, R8, R9, R10, R11, R12) + SHA512ROUND0( 4, R12, R13, R14, R15, R8, R9, R10, R11) + SHA512ROUND0( 5, R11, R12, R13, R14, R15, R8, R9, R10) + SHA512ROUND0( 6, R10, R11, R12, R13, R14, R15, R8, R9) + SHA512ROUND0( 7, R9, R10, R11, R12, R13, R14, R15, R8) + SHA512ROUND0( 8, R8, R9, R10, R11, R12, R13, R14, R15) + SHA512ROUND0( 9, R15, R8, R9, R10, R11, R12, R13, R14) + SHA512ROUND0(10, R14, R15, R8, R9, R10, R11, R12, R13) + SHA512ROUND0(11, R13, R14, R15, R8, R9, R10, R11, R12) + SHA512ROUND0(12, R12, R13, R14, R15, R8, R9, R10, R11) + SHA512ROUND0(13, R11, R12, R13, R14, R15, R8, R9, R10) + SHA512ROUND0(14, R10, R11, R12, R13, R14, R15, R8, R9) + SHA512ROUND0(15, R9, R10, R11, R12, R13, R14, R15, R8) + + SHA512ROUND1(16, R8, R9, R10, R11, R12, R13, R14, R15) + SHA512ROUND1(17, R15, R8, R9, R10, R11, R12, R13, R14) + SHA512ROUND1(18, R14, R15, R8, R9, R10, R11, R12, R13) + SHA512ROUND1(19, R13, R14, R15, R8, R9, R10, R11, R12) + SHA512ROUND1(20, R12, R13, R14, R15, R8, R9, R10, R11) + SHA512ROUND1(21, R11, R12, R13, R14, R15, R8, R9, R10) + SHA512ROUND1(22, R10, R11, R12, R13, R14, R15, R8, R9) + SHA512ROUND1(23, R9, R10, R11, R12, R13, R14, R15, R8) + SHA512ROUND1(24, R8, R9, R10, R11, R12, R13, R14, R15) + SHA512ROUND1(25, R15, R8, R9, R10, R11, R12, R13, R14) + SHA512ROUND1(26, R14, R15, R8, R9, R10, R11, R12, R13) + SHA512ROUND1(27, R13, R14, R15, R8, R9, R10, R11, R12) + SHA512ROUND1(28, R12, R13, R14, R15, R8, R9, R10, R11) + SHA512ROUND1(29, R11, R12, R13, R14, R15, R8, R9, R10) + SHA512ROUND1(30, R10, R11, R12, R13, R14, R15, R8, R9) + SHA512ROUND1(31, R9, R10, R11, R12, R13, R14, R15, R8) + SHA512ROUND1(32, R8, R9, R10, R11, R12, R13, R14, R15) + SHA512ROUND1(33, R15, R8, R9, R10, R11, R12, R13, R14) + SHA512ROUND1(34, R14, R15, R8, R9, R10, R11, R12, R13) + SHA512ROUND1(35, R13, R14, R15, R8, R9, R10, R11, R12) + SHA512ROUND1(36, R12, R13, R14, R15, R8, R9, R10, R11) + SHA512ROUND1(37, R11, R12, R13, R14, R15, R8, R9, R10) + SHA512ROUND1(38, R10, R11, R12, R13, R14, R15, R8, R9) + SHA512ROUND1(39, R9, R10, R11, R12, R13, R14, R15, R8) + SHA512ROUND1(40, R8, R9, R10, R11, R12, R13, R14, R15) + SHA512ROUND1(41, R15, R8, R9, R10, R11, R12, R13, R14) + SHA512ROUND1(42, R14, R15, R8, R9, R10, R11, R12, R13) + SHA512ROUND1(43, R13, R14, R15, R8, R9, R10, R11, R12) + SHA512ROUND1(44, R12, R13, R14, R15, R8, R9, R10, R11) + SHA512ROUND1(45, R11, R12, R13, R14, R15, R8, R9, R10) + SHA512ROUND1(46, R10, R11, R12, R13, R14, R15, R8, R9) + SHA512ROUND1(47, R9, R10, R11, R12, R13, R14, R15, R8) + SHA512ROUND1(48, R8, R9, R10, R11, R12, R13, R14, R15) + SHA512ROUND1(49, R15, R8, R9, R10, R11, R12, R13, R14) + SHA512ROUND1(50, R14, R15, R8, R9, R10, R11, R12, R13) + SHA512ROUND1(51, R13, R14, R15, R8, R9, R10, R11, R12) + SHA512ROUND1(52, R12, R13, R14, R15, R8, R9, R10, R11) + SHA512ROUND1(53, R11, R12, R13, R14, R15, R8, R9, R10) + SHA512ROUND1(54, R10, R11, R12, R13, R14, R15, R8, R9) + SHA512ROUND1(55, R9, R10, R11, R12, R13, R14, R15, R8) + SHA512ROUND1(56, R8, R9, R10, R11, R12, R13, R14, R15) + SHA512ROUND1(57, R15, R8, R9, R10, R11, R12, R13, R14) + SHA512ROUND1(58, R14, R15, R8, R9, R10, R11, R12, R13) + SHA512ROUND1(59, R13, R14, R15, R8, R9, R10, R11, R12) + SHA512ROUND1(60, R12, R13, R14, R15, R8, R9, R10, R11) + SHA512ROUND1(61, R11, R12, R13, R14, R15, R8, R9, R10) + SHA512ROUND1(62, R10, R11, R12, R13, R14, R15, R8, R9) + SHA512ROUND1(63, R9, R10, R11, R12, R13, R14, R15, R8) + SHA512ROUND1(64, R8, R9, R10, R11, R12, R13, R14, R15) + SHA512ROUND1(65, R15, R8, R9, R10, R11, R12, R13, R14) + SHA512ROUND1(66, R14, R15, R8, R9, R10, R11, R12, R13) + SHA512ROUND1(67, R13, R14, R15, R8, R9, R10, R11, R12) + SHA512ROUND1(68, R12, R13, R14, R15, R8, R9, R10, R11) + SHA512ROUND1(69, R11, R12, R13, R14, R15, R8, R9, R10) + SHA512ROUND1(70, R10, R11, R12, R13, R14, R15, R8, R9) + SHA512ROUND1(71, R9, R10, R11, R12, R13, R14, R15, R8) + SHA512ROUND1(72, R8, R9, R10, R11, R12, R13, R14, R15) + SHA512ROUND1(73, R15, R8, R9, R10, R11, R12, R13, R14) + SHA512ROUND1(74, R14, R15, R8, R9, R10, R11, R12, R13) + SHA512ROUND1(75, R13, R14, R15, R8, R9, R10, R11, R12) + SHA512ROUND1(76, R12, R13, R14, R15, R8, R9, R10, R11) + SHA512ROUND1(77, R11, R12, R13, R14, R15, R8, R9, R10) + SHA512ROUND1(78, R10, R11, R12, R13, R14, R15, R8, R9) + SHA512ROUND1(79, R9, R10, R11, R12, R13, R14, R15, R8) MOVV (0*8)(R4), REGTMP MOVV (1*8)(R4), REGTMP1