]> Cypherpunks repositories - gostls13.git/commitdiff
crypto/sha512: use const table for key loading on loong64
authorJulian Zhu <jz531210@gmail.com>
Tue, 3 Jun 2025 17:11:15 +0000 (01:11 +0800)
committerabner chenc <chenguoqi@loongson.cn>
Wed, 6 Aug 2025 01:02:52 +0000 (18:02 -0700)
Load constant keys from a static memory table rather than loading immediates into registers on loong64.

Benchmark for Loongson-3A5000:
goos: linux
goarch: loong64
pkg: crypto/sha512
cpu: Loongson-3A5000-HV @ 2500.00MHz
                    │   sha512o   │              sha512n            │
                    │   sec/op    │   sec/op     vs base            │
Hash8Bytes/New-4      489.1n ± 0%   464.7n ± 0%  -5.00% (p=0.000 n=8)
Hash8Bytes/Sum384-4   499.1n ± 0%   474.6n ± 0%  -4.92% (p=0.000 n=8)
Hash8Bytes/Sum512-4   506.6n ± 0%   481.9n ± 0%  -4.86% (p=0.000 n=8)
Hash1K/New-4          3.371µ ± 0%   3.152µ ± 0%  -6.51% (p=0.000 n=8)
Hash1K/Sum384-4       3.385µ ± 0%   3.164µ ± 0%  -6.53% (p=0.000 n=8)
Hash1K/Sum512-4       3.392µ ± 0%   3.170µ ± 0%  -6.54% (p=0.000 n=8)
Hash8K/New-4          23.62µ ± 0%   22.01µ ± 0%  -6.82% (p=0.000 n=8)
Hash8K/Sum384-4       23.63µ ± 0%   22.02µ ± 0%  -6.82% (p=0.000 n=8)
Hash8K/Sum512-4       23.64µ ± 0%   22.02µ ± 0%  -6.86% (p=0.000 n=8)
geomean               3.415µ        3.207µ       -6.10%

                    │   sha512o    │              sha512n            │
                    │     B/s      │     B/s       vs base           │
Hash8Bytes/New-4     15.60Mi ± 0%   16.42Mi ± 0%  +5.29% (p=0.000 n=8)
Hash8Bytes/Sum384-4  15.29Mi ± 0%   16.08Mi ± 0%  +5.18% (p=0.000 n=8)
Hash8Bytes/Sum512-4  15.06Mi ± 0%   15.83Mi ± 0%  +5.13% (p=0.000 n=8)
Hash1K/New-4         289.7Mi ± 0%   309.9Mi ± 0%  +6.97% (p=0.000 n=8)
Hash1K/Sum384-4      288.5Mi ± 0%   308.6Mi ± 0%  +6.97% (p=0.000 n=8)
Hash1K/Sum512-4      287.9Mi ± 0%   308.0Mi ± 0%  +7.00% (p=0.000 n=8)
Hash8K/New-4         330.8Mi ± 0%   355.0Mi ± 0%  +7.32% (p=0.000 n=8)
Hash8K/Sum384-4      330.6Mi ± 0%   354.9Mi ± 0%  +7.32% (p=0.000 n=8)
Hash8K/Sum512-4      330.5Mi ± 0%   354.8Mi ± 0%  +7.36% (p=0.000 n=8)
geomean              113.5Mi        120.9Mi       +6.50%

Benchmark for Loongson-3A6000:
goos: linux
goarch: loong64
pkg: crypto/sha512
cpu: Loongson-3A6000 @ 2500.00MHz
                    │ sha512.old  │             sha512.new           │
                    │   sec/op    │   sec/op     vs base             │
Hash8Bytes/New-8      397.2n ± 0%   380.6n ± 0%  -4.17% (p=0.000 n=10)
Hash8Bytes/Sum384-8   406.1n ± 0%   397.9n ± 0%  -2.02% (p=0.000 n=10)
Hash8Bytes/Sum512-8   410.1n ± 0%   395.8n ± 1%  -3.50% (p=0.000 n=10)
Hash1K/New-8          2.932µ ± 0%   2.800µ ± 0%  -4.50% (p=0.000 n=10)
Hash1K/Sum384-8       2.941µ ± 0%   2.812µ ± 0%  -4.39% (p=0.000 n=10)
Hash1K/Sum512-8       2.947µ ± 0%   2.814µ ± 0%  -4.50% (p=0.000 n=10)
Hash8K/New-8          20.68µ ± 0%   19.73µ ± 1%  -4.58% (p=0.000 n=10)
Hash8K/Sum384-8       20.69µ ± 0%   19.73µ ± 0%  -4.62% (p=0.000 n=10)
Hash8K/Sum512-8       20.70µ ± 0%   19.75µ ± 0%  -4.60% (p=0.000 n=10)
geomean               2.908µ        2.789µ       -4.10%

                    │  sha512.old  │             sha512.new          │
                    │     B/s      │     B/s       vs base           │
Hash8Bytes/New-8    19.21Mi ± 0%   20.05Mi ± 0%  +4.37% (p=0.000 n=10)
Hash8Bytes/Sum384-8 18.79Mi ± 0%   19.18Mi ± 0%  +2.08% (p=0.000 n=10)
Hash8Bytes/Sum512-8 18.60Mi ± 0%   19.28Mi ± 1%  +3.64% (p=0.000 n=10)
Hash1K/New-8        333.1Mi ± 0%   348.8Mi ± 0%  +4.71% (p=0.000 n=10)
Hash1K/Sum384-8     332.0Mi ± 0%   347.3Mi ± 0%  +4.60% (p=0.000 n=10)
Hash1K/Sum512-8     331.5Mi ± 0%   347.0Mi ± 0%  +4.69% (p=0.000 n=10)
Hash8K/New-8        377.8Mi ± 0%   396.0Mi ± 1%  +4.80% (p=0.000 n=10)
Hash8K/Sum384-8     377.7Mi ± 0%   396.0Mi ± 0%  +4.85% (p=0.000 n=10)
Hash8K/Sum512-8     377.5Mi ± 0%   395.7Mi ± 0%  +4.82% (p=0.000 n=10)
geomean             133.3Mi        139.0Mi       +4.28%

Change-Id: I55ae4a8e4b0c51a98583f654158235fe738cf348
Reviewed-on: https://go-review.googlesource.com/c/go/+/678436
Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn>
Reviewed-by: Mark Freeman <markfreeman@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
src/crypto/internal/fips140/sha512/sha512block_loong64.s

index f65d563ca34d824c9c52e559c75cd434a15ba7a0..751ab4e4f696e720a3a79456b5c4bd5d1c49350a 100644 (file)
@@ -14,6 +14,7 @@
 #define REGTMP3        R18
 #define REGTMP4        R7
 #define REGTMP5        R6
+#define REG_KT R19
 
 // W[i] = M[i]; for 0 <= i <= 15
 #define LOAD0(index) \
@@ -52,8 +53,9 @@
 //     Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
 //                 = ((y XOR z) AND x) XOR z
 // Calculate T1 in REGTMP4
-#define SHA512T1(const, e, f, g, h) \
-       ADDV    $const, h; \
+#define SHA512T1(index, e, f, g, h) \
+       MOVV    (index*8)(REG_KT), REGTMP5; \
+       ADDV    REGTMP5, h; \
        ADDV    REGTMP4, h; \
        ROTRV   $14, e, REGTMP5; \
        ROTRV   $18, e, REGTMP; \
 
 // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
 // The values for e and a are stored in d and h, ready for rotation.
-#define SHA512ROUND(const, a, b, c, d, e, f, g, h) \
-       SHA512T1(const, e, f, g, h); \
+#define SHA512ROUND(index, a, b, c, d, e, f, g, h) \
+       SHA512T1(index, e, f, g, h); \
        SHA512T2(a, b, c); \
        ADDV    REGTMP4, d; \
        ADDV    REGTMP1, REGTMP4, h
 
-#define SHA512ROUND0(index, const, a, b, c, d, e, f, g, h) \
+#define SHA512ROUND0(index, a, b, c, d, e, f, g, h) \
        LOAD0(index); \
-       SHA512ROUND(const, a, b, c, d, e, f, g, h)
+       SHA512ROUND(index, a, b, c, d, e, f, g, h)
 
-#define SHA512ROUND1(index, const, a, b, c, d, e, f, g, h) \
+#define SHA512ROUND1(index, a, b, c, d, e, f, g, h) \
        LOAD1(index); \
-       SHA512ROUND(const, a, b, c, d, e, f, g, h)
+       SHA512ROUND(index, a, b, c, d, e, f, g, h)
 
 // A stack frame size of 128 bytes is required here, because
 // the frame size used for data expansion is 128 bytes.
@@ -110,6 +112,8 @@ TEXT ·block(SB),NOSPLIT,$128-32
        AND     $~127, R6
        BEQ     R6, end
 
+       MOVV    $·_K(SB), REG_KT               // const table
+
        // p_len >= 128
        MOVV    dig+0(FP), R4
        ADDV    R5, R6, R25
@@ -123,87 +127,87 @@ TEXT ·block(SB),NOSPLIT,$128-32
        MOVV    (7*8)(R4), R15  // h = H7
 
 loop:
-       SHA512ROUND0( 0, 0x428a2f98d728ae22, R8,  R9,  R10, R11, R12, R13, R14, R15)
-       SHA512ROUND0( 1, 0x7137449123ef65cd, R15, R8,  R9,  R10, R11, R12, R13, R14)
-       SHA512ROUND0( 2, 0xb5c0fbcfec4d3b2f, R14, R15, R8,  R9,  R10, R11, R12, R13)
-       SHA512ROUND0( 3, 0xe9b5dba58189dbbc, R13, R14, R15, R8,  R9,  R10, R11, R12)
-       SHA512ROUND0( 4, 0x3956c25bf348b538, R12, R13, R14, R15, R8,  R9,  R10, R11)
-       SHA512ROUND0( 5, 0x59f111f1b605d019, R11, R12, R13, R14, R15, R8,  R9,  R10)
-       SHA512ROUND0( 6, 0x923f82a4af194f9b, R10, R11, R12, R13, R14, R15, R8,  R9)
-       SHA512ROUND0( 7, 0xab1c5ed5da6d8118, R9,  R10, R11, R12, R13, R14, R15, R8)
-       SHA512ROUND0( 8, 0xd807aa98a3030242, R8,  R9,  R10, R11, R12, R13, R14, R15)
-       SHA512ROUND0( 9, 0x12835b0145706fbe, R15, R8,  R9,  R10, R11, R12, R13, R14)
-       SHA512ROUND0(10, 0x243185be4ee4b28c, R14, R15, R8,  R9,  R10, R11, R12, R13)
-       SHA512ROUND0(11, 0x550c7dc3d5ffb4e2, R13, R14, R15, R8,  R9,  R10, R11, R12)
-       SHA512ROUND0(12, 0x72be5d74f27b896f, R12, R13, R14, R15, R8,  R9,  R10, R11)
-       SHA512ROUND0(13, 0x80deb1fe3b1696b1, R11, R12, R13, R14, R15, R8,  R9,  R10)
-       SHA512ROUND0(14, 0x9bdc06a725c71235, R10, R11, R12, R13, R14, R15, R8,  R9)
-       SHA512ROUND0(15, 0xc19bf174cf692694, R9,  R10, R11, R12, R13, R14, R15, R8)
-
-       SHA512ROUND1(16, 0xe49b69c19ef14ad2, R8,  R9,  R10, R11, R12, R13, R14, R15)
-       SHA512ROUND1(17, 0xefbe4786384f25e3, R15, R8,  R9,  R10, R11, R12, R13, R14)
-       SHA512ROUND1(18, 0x0fc19dc68b8cd5b5, R14, R15, R8,  R9,  R10, R11, R12, R13)
-       SHA512ROUND1(19, 0x240ca1cc77ac9c65, R13, R14, R15, R8,  R9,  R10, R11, R12)
-       SHA512ROUND1(20, 0x2de92c6f592b0275, R12, R13, R14, R15, R8,  R9,  R10, R11)
-       SHA512ROUND1(21, 0x4a7484aa6ea6e483, R11, R12, R13, R14, R15, R8,  R9,  R10)
-       SHA512ROUND1(22, 0x5cb0a9dcbd41fbd4, R10, R11, R12, R13, R14, R15, R8,  R9)
-       SHA512ROUND1(23, 0x76f988da831153b5, R9,  R10, R11, R12, R13, R14, R15, R8)
-       SHA512ROUND1(24, 0x983e5152ee66dfab, R8,  R9,  R10, R11, R12, R13, R14, R15)
-       SHA512ROUND1(25, 0xa831c66d2db43210, R15, R8,  R9,  R10, R11, R12, R13, R14)
-       SHA512ROUND1(26, 0xb00327c898fb213f, R14, R15, R8,  R9,  R10, R11, R12, R13)
-       SHA512ROUND1(27, 0xbf597fc7beef0ee4, R13, R14, R15, R8,  R9,  R10, R11, R12)
-       SHA512ROUND1(28, 0xc6e00bf33da88fc2, R12, R13, R14, R15, R8,  R9,  R10, R11)
-       SHA512ROUND1(29, 0xd5a79147930aa725, R11, R12, R13, R14, R15, R8,  R9,  R10)
-       SHA512ROUND1(30, 0x06ca6351e003826f, R10, R11, R12, R13, R14, R15, R8,  R9)
-       SHA512ROUND1(31, 0x142929670a0e6e70, R9,  R10, R11, R12, R13, R14, R15, R8)
-       SHA512ROUND1(32, 0x27b70a8546d22ffc, R8,  R9,  R10, R11, R12, R13, R14, R15)
-       SHA512ROUND1(33, 0x2e1b21385c26c926, R15, R8,  R9,  R10, R11, R12, R13, R14)
-       SHA512ROUND1(34, 0x4d2c6dfc5ac42aed, R14, R15, R8,  R9,  R10, R11, R12, R13)
-       SHA512ROUND1(35, 0x53380d139d95b3df, R13, R14, R15, R8,  R9,  R10, R11, R12)
-       SHA512ROUND1(36, 0x650a73548baf63de, R12, R13, R14, R15, R8,  R9,  R10, R11)
-       SHA512ROUND1(37, 0x766a0abb3c77b2a8, R11, R12, R13, R14, R15, R8,  R9,  R10)
-       SHA512ROUND1(38, 0x81c2c92e47edaee6, R10, R11, R12, R13, R14, R15, R8,  R9)
-       SHA512ROUND1(39, 0x92722c851482353b, R9,  R10, R11, R12, R13, R14, R15, R8)
-       SHA512ROUND1(40, 0xa2bfe8a14cf10364, R8,  R9,  R10, R11, R12, R13, R14, R15)
-       SHA512ROUND1(41, 0xa81a664bbc423001, R15, R8,  R9,  R10, R11, R12, R13, R14)
-       SHA512ROUND1(42, 0xc24b8b70d0f89791, R14, R15, R8,  R9,  R10, R11, R12, R13)
-       SHA512ROUND1(43, 0xc76c51a30654be30, R13, R14, R15, R8,  R9,  R10, R11, R12)
-       SHA512ROUND1(44, 0xd192e819d6ef5218, R12, R13, R14, R15, R8,  R9,  R10, R11)
-       SHA512ROUND1(45, 0xd69906245565a910, R11, R12, R13, R14, R15, R8,  R9,  R10)
-       SHA512ROUND1(46, 0xf40e35855771202a, R10, R11, R12, R13, R14, R15, R8,  R9)
-       SHA512ROUND1(47, 0x106aa07032bbd1b8, R9,  R10, R11, R12, R13, R14, R15, R8)
-       SHA512ROUND1(48, 0x19a4c116b8d2d0c8, R8,  R9,  R10, R11, R12, R13, R14, R15)
-       SHA512ROUND1(49, 0x1e376c085141ab53, R15, R8,  R9,  R10, R11, R12, R13, R14)
-       SHA512ROUND1(50, 0x2748774cdf8eeb99, R14, R15, R8,  R9,  R10, R11, R12, R13)
-       SHA512ROUND1(51, 0x34b0bcb5e19b48a8, R13, R14, R15, R8,  R9,  R10, R11, R12)
-       SHA512ROUND1(52, 0x391c0cb3c5c95a63, R12, R13, R14, R15, R8,  R9,  R10, R11)
-       SHA512ROUND1(53, 0x4ed8aa4ae3418acb, R11, R12, R13, R14, R15, R8,  R9,  R10)
-       SHA512ROUND1(54, 0x5b9cca4f7763e373, R10, R11, R12, R13, R14, R15, R8,  R9)
-       SHA512ROUND1(55, 0x682e6ff3d6b2b8a3, R9,  R10, R11, R12, R13, R14, R15, R8)
-       SHA512ROUND1(56, 0x748f82ee5defb2fc, R8,  R9,  R10, R11, R12, R13, R14, R15)
-       SHA512ROUND1(57, 0x78a5636f43172f60, R15, R8,  R9,  R10, R11, R12, R13, R14)
-       SHA512ROUND1(58, 0x84c87814a1f0ab72, R14, R15, R8,  R9,  R10, R11, R12, R13)
-       SHA512ROUND1(59, 0x8cc702081a6439ec, R13, R14, R15, R8,  R9,  R10, R11, R12)
-       SHA512ROUND1(60, 0x90befffa23631e28, R12, R13, R14, R15, R8,  R9,  R10, R11)
-       SHA512ROUND1(61, 0xa4506cebde82bde9, R11, R12, R13, R14, R15, R8,  R9,  R10)
-       SHA512ROUND1(62, 0xbef9a3f7b2c67915, R10, R11, R12, R13, R14, R15, R8,  R9)
-       SHA512ROUND1(63, 0xc67178f2e372532b, R9,  R10, R11, R12, R13, R14, R15, R8)
-       SHA512ROUND1(64, 0xca273eceea26619c, R8,  R9,  R10, R11, R12, R13, R14, R15)
-       SHA512ROUND1(65, 0xd186b8c721c0c207, R15, R8,  R9,  R10, R11, R12, R13, R14)
-       SHA512ROUND1(66, 0xeada7dd6cde0eb1e, R14, R15, R8,  R9,  R10, R11, R12, R13)
-       SHA512ROUND1(67, 0xf57d4f7fee6ed178, R13, R14, R15, R8,  R9,  R10, R11, R12)
-       SHA512ROUND1(68, 0x06f067aa72176fba, R12, R13, R14, R15, R8,  R9,  R10, R11)
-       SHA512ROUND1(69, 0x0a637dc5a2c898a6, R11, R12, R13, R14, R15, R8,  R9,  R10)
-       SHA512ROUND1(70, 0x113f9804bef90dae, R10, R11, R12, R13, R14, R15, R8,  R9)
-       SHA512ROUND1(71, 0x1b710b35131c471b, R9,  R10, R11, R12, R13, R14, R15, R8)
-       SHA512ROUND1(72, 0x28db77f523047d84, R8,  R9,  R10, R11, R12, R13, R14, R15)
-       SHA512ROUND1(73, 0x32caab7b40c72493, R15, R8,  R9,  R10, R11, R12, R13, R14)
-       SHA512ROUND1(74, 0x3c9ebe0a15c9bebc, R14, R15, R8,  R9,  R10, R11, R12, R13)
-       SHA512ROUND1(75, 0x431d67c49c100d4c, R13, R14, R15, R8,  R9,  R10, R11, R12)
-       SHA512ROUND1(76, 0x4cc5d4becb3e42b6, R12, R13, R14, R15, R8,  R9,  R10, R11)
-       SHA512ROUND1(77, 0x597f299cfc657e2a, R11, R12, R13, R14, R15, R8,  R9,  R10)
-       SHA512ROUND1(78, 0x5fcb6fab3ad6faec, R10, R11, R12, R13, R14, R15, R8,  R9)
-       SHA512ROUND1(79, 0x6c44198c4a475817, R9,  R10, R11, R12, R13, R14, R15, R8)
+       SHA512ROUND0( 0, R8,  R9,  R10, R11, R12, R13, R14, R15)
+       SHA512ROUND0( 1, R15, R8,  R9,  R10, R11, R12, R13, R14)
+       SHA512ROUND0( 2, R14, R15, R8,  R9,  R10, R11, R12, R13)
+       SHA512ROUND0( 3, R13, R14, R15, R8,  R9,  R10, R11, R12)
+       SHA512ROUND0( 4, R12, R13, R14, R15, R8,  R9,  R10, R11)
+       SHA512ROUND0( 5, R11, R12, R13, R14, R15, R8,  R9,  R10)
+       SHA512ROUND0( 6, R10, R11, R12, R13, R14, R15, R8,  R9)
+       SHA512ROUND0( 7, R9,  R10, R11, R12, R13, R14, R15, R8)
+       SHA512ROUND0( 8, R8,  R9,  R10, R11, R12, R13, R14, R15)
+       SHA512ROUND0( 9, R15, R8,  R9,  R10, R11, R12, R13, R14)
+       SHA512ROUND0(10, R14, R15, R8,  R9,  R10, R11, R12, R13)
+       SHA512ROUND0(11, R13, R14, R15, R8,  R9,  R10, R11, R12)
+       SHA512ROUND0(12, R12, R13, R14, R15, R8,  R9,  R10, R11)
+       SHA512ROUND0(13, R11, R12, R13, R14, R15, R8,  R9,  R10)
+       SHA512ROUND0(14, R10, R11, R12, R13, R14, R15, R8,  R9)
+       SHA512ROUND0(15, R9,  R10, R11, R12, R13, R14, R15, R8)
+
+       SHA512ROUND1(16, R8,  R9,  R10, R11, R12, R13, R14, R15)
+       SHA512ROUND1(17, R15, R8,  R9,  R10, R11, R12, R13, R14)
+       SHA512ROUND1(18, R14, R15, R8,  R9,  R10, R11, R12, R13)
+       SHA512ROUND1(19, R13, R14, R15, R8,  R9,  R10, R11, R12)
+       SHA512ROUND1(20, R12, R13, R14, R15, R8,  R9,  R10, R11)
+       SHA512ROUND1(21, R11, R12, R13, R14, R15, R8,  R9,  R10)
+       SHA512ROUND1(22, R10, R11, R12, R13, R14, R15, R8,  R9)
+       SHA512ROUND1(23, R9,  R10, R11, R12, R13, R14, R15, R8)
+       SHA512ROUND1(24, R8,  R9,  R10, R11, R12, R13, R14, R15)
+       SHA512ROUND1(25, R15, R8,  R9,  R10, R11, R12, R13, R14)
+       SHA512ROUND1(26, R14, R15, R8,  R9,  R10, R11, R12, R13)
+       SHA512ROUND1(27, R13, R14, R15, R8,  R9,  R10, R11, R12)
+       SHA512ROUND1(28, R12, R13, R14, R15, R8,  R9,  R10, R11)
+       SHA512ROUND1(29, R11, R12, R13, R14, R15, R8,  R9,  R10)
+       SHA512ROUND1(30, R10, R11, R12, R13, R14, R15, R8,  R9)
+       SHA512ROUND1(31, R9,  R10, R11, R12, R13, R14, R15, R8)
+       SHA512ROUND1(32, R8,  R9,  R10, R11, R12, R13, R14, R15)
+       SHA512ROUND1(33, R15, R8,  R9,  R10, R11, R12, R13, R14)
+       SHA512ROUND1(34, R14, R15, R8,  R9,  R10, R11, R12, R13)
+       SHA512ROUND1(35, R13, R14, R15, R8,  R9,  R10, R11, R12)
+       SHA512ROUND1(36, R12, R13, R14, R15, R8,  R9,  R10, R11)
+       SHA512ROUND1(37, R11, R12, R13, R14, R15, R8,  R9,  R10)
+       SHA512ROUND1(38, R10, R11, R12, R13, R14, R15, R8,  R9)
+       SHA512ROUND1(39, R9,  R10, R11, R12, R13, R14, R15, R8)
+       SHA512ROUND1(40, R8,  R9,  R10, R11, R12, R13, R14, R15)
+       SHA512ROUND1(41, R15, R8,  R9,  R10, R11, R12, R13, R14)
+       SHA512ROUND1(42, R14, R15, R8,  R9,  R10, R11, R12, R13)
+       SHA512ROUND1(43, R13, R14, R15, R8,  R9,  R10, R11, R12)
+       SHA512ROUND1(44, R12, R13, R14, R15, R8,  R9,  R10, R11)
+       SHA512ROUND1(45, R11, R12, R13, R14, R15, R8,  R9,  R10)
+       SHA512ROUND1(46, R10, R11, R12, R13, R14, R15, R8,  R9)
+       SHA512ROUND1(47, R9,  R10, R11, R12, R13, R14, R15, R8)
+       SHA512ROUND1(48, R8,  R9,  R10, R11, R12, R13, R14, R15)
+       SHA512ROUND1(49, R15, R8,  R9,  R10, R11, R12, R13, R14)
+       SHA512ROUND1(50, R14, R15, R8,  R9,  R10, R11, R12, R13)
+       SHA512ROUND1(51, R13, R14, R15, R8,  R9,  R10, R11, R12)
+       SHA512ROUND1(52, R12, R13, R14, R15, R8,  R9,  R10, R11)
+       SHA512ROUND1(53, R11, R12, R13, R14, R15, R8,  R9,  R10)
+       SHA512ROUND1(54, R10, R11, R12, R13, R14, R15, R8,  R9)
+       SHA512ROUND1(55, R9,  R10, R11, R12, R13, R14, R15, R8)
+       SHA512ROUND1(56, R8,  R9,  R10, R11, R12, R13, R14, R15)
+       SHA512ROUND1(57, R15, R8,  R9,  R10, R11, R12, R13, R14)
+       SHA512ROUND1(58, R14, R15, R8,  R9,  R10, R11, R12, R13)
+       SHA512ROUND1(59, R13, R14, R15, R8,  R9,  R10, R11, R12)
+       SHA512ROUND1(60, R12, R13, R14, R15, R8,  R9,  R10, R11)
+       SHA512ROUND1(61, R11, R12, R13, R14, R15, R8,  R9,  R10)
+       SHA512ROUND1(62, R10, R11, R12, R13, R14, R15, R8,  R9)
+       SHA512ROUND1(63, R9,  R10, R11, R12, R13, R14, R15, R8)
+       SHA512ROUND1(64, R8,  R9,  R10, R11, R12, R13, R14, R15)
+       SHA512ROUND1(65, R15, R8,  R9,  R10, R11, R12, R13, R14)
+       SHA512ROUND1(66, R14, R15, R8,  R9,  R10, R11, R12, R13)
+       SHA512ROUND1(67, R13, R14, R15, R8,  R9,  R10, R11, R12)
+       SHA512ROUND1(68, R12, R13, R14, R15, R8,  R9,  R10, R11)
+       SHA512ROUND1(69, R11, R12, R13, R14, R15, R8,  R9,  R10)
+       SHA512ROUND1(70, R10, R11, R12, R13, R14, R15, R8,  R9)
+       SHA512ROUND1(71, R9,  R10, R11, R12, R13, R14, R15, R8)
+       SHA512ROUND1(72, R8,  R9,  R10, R11, R12, R13, R14, R15)
+       SHA512ROUND1(73, R15, R8,  R9,  R10, R11, R12, R13, R14)
+       SHA512ROUND1(74, R14, R15, R8,  R9,  R10, R11, R12, R13)
+       SHA512ROUND1(75, R13, R14, R15, R8,  R9,  R10, R11, R12)
+       SHA512ROUND1(76, R12, R13, R14, R15, R8,  R9,  R10, R11)
+       SHA512ROUND1(77, R11, R12, R13, R14, R15, R8,  R9,  R10)
+       SHA512ROUND1(78, R10, R11, R12, R13, R14, R15, R8,  R9)
+       SHA512ROUND1(79, R9,  R10, R11, R12, R13, R14, R15, R8)
 
        MOVV    (0*8)(R4), REGTMP
        MOVV    (1*8)(R4), REGTMP1