--- /dev/null
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+ . "github.com/mmcloughlin/avo/build"
+ . "github.com/mmcloughlin/avo/operand"
+ . "github.com/mmcloughlin/avo/reg"
+)
+
+//go:generate go run . -out ../sha512block_amd64.s -pkg sha512
+
+// SHA512 block routine. See sha512block.go for Go equivalent.
+//
+// The algorithm is detailed in FIPS 180-4:
+//
+// https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
+//
+// Wt = Mt; for 0 <= t <= 15
+// Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79
+//
+// a = H0
+// b = H1
+// c = H2
+// d = H3
+// e = H4
+// f = H5
+// g = H6
+// h = H7
+//
+// for t = 0 to 79 {
+// T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
+// T2 = BIGSIGMA0(a) + Maj(a,b,c)
+// h = g
+// g = f
+// f = e
+// e = d + T1
+// d = c
+// c = b
+// b = a
+// a = T1 + T2
+// }
+//
+// H0 = a + H0
+// H1 = b + H1
+// H2 = c + H2
+// H3 = d + H3
+// H4 = e + H4
+// H5 = f + H5
+// H6 = g + H6
+// H7 = h + H7
+
+const ThatPeskyUnicodeDot = "\u00b7"
+
+var _K = []uint64{
+ 0x428a2f98d728ae22,
+ 0x7137449123ef65cd,
+ 0xb5c0fbcfec4d3b2f,
+ 0xe9b5dba58189dbbc,
+ 0x3956c25bf348b538,
+ 0x59f111f1b605d019,
+ 0x923f82a4af194f9b,
+ 0xab1c5ed5da6d8118,
+ 0xd807aa98a3030242,
+ 0x12835b0145706fbe,
+ 0x243185be4ee4b28c,
+ 0x550c7dc3d5ffb4e2,
+ 0x72be5d74f27b896f,
+ 0x80deb1fe3b1696b1,
+ 0x9bdc06a725c71235,
+ 0xc19bf174cf692694,
+ 0xe49b69c19ef14ad2,
+ 0xefbe4786384f25e3,
+ 0x0fc19dc68b8cd5b5,
+ 0x240ca1cc77ac9c65,
+ 0x2de92c6f592b0275,
+ 0x4a7484aa6ea6e483,
+ 0x5cb0a9dcbd41fbd4,
+ 0x76f988da831153b5,
+ 0x983e5152ee66dfab,
+ 0xa831c66d2db43210,
+ 0xb00327c898fb213f,
+ 0xbf597fc7beef0ee4,
+ 0xc6e00bf33da88fc2,
+ 0xd5a79147930aa725,
+ 0x06ca6351e003826f,
+ 0x142929670a0e6e70,
+ 0x27b70a8546d22ffc,
+ 0x2e1b21385c26c926,
+ 0x4d2c6dfc5ac42aed,
+ 0x53380d139d95b3df,
+ 0x650a73548baf63de,
+ 0x766a0abb3c77b2a8,
+ 0x81c2c92e47edaee6,
+ 0x92722c851482353b,
+ 0xa2bfe8a14cf10364,
+ 0xa81a664bbc423001,
+ 0xc24b8b70d0f89791,
+ 0xc76c51a30654be30,
+ 0xd192e819d6ef5218,
+ 0xd69906245565a910,
+ 0xf40e35855771202a,
+ 0x106aa07032bbd1b8,
+ 0x19a4c116b8d2d0c8,
+ 0x1e376c085141ab53,
+ 0x2748774cdf8eeb99,
+ 0x34b0bcb5e19b48a8,
+ 0x391c0cb3c5c95a63,
+ 0x4ed8aa4ae3418acb,
+ 0x5b9cca4f7763e373,
+ 0x682e6ff3d6b2b8a3,
+ 0x748f82ee5defb2fc,
+ 0x78a5636f43172f60,
+ 0x84c87814a1f0ab72,
+ 0x8cc702081a6439ec,
+ 0x90befffa23631e28,
+ 0xa4506cebde82bde9,
+ 0xbef9a3f7b2c67915,
+ 0xc67178f2e372532b,
+ 0xca273eceea26619c,
+ 0xd186b8c721c0c207,
+ 0xeada7dd6cde0eb1e,
+ 0xf57d4f7fee6ed178,
+ 0x06f067aa72176fba,
+ 0x0a637dc5a2c898a6,
+ 0x113f9804bef90dae,
+ 0x1b710b35131c471b,
+ 0x28db77f523047d84,
+ 0x32caab7b40c72493,
+ 0x3c9ebe0a15c9bebc,
+ 0x431d67c49c100d4c,
+ 0x4cc5d4becb3e42b6,
+ 0x597f299cfc657e2a,
+ 0x5fcb6fab3ad6faec,
+ 0x6c44198c4a475817,
+}
+
+func main() {
+ Package("crypto/sha512")
+ ConstraintExpr("!purego")
+ blockAMD64()
+ blockAVX2()
+ Generate()
+}
+
+// Wt = Mt; for 0 <= t <= 15
+//
+// Line 50
+func MSGSCHEDULE0(index int) {
+ MOVQ(Mem{Base: SI}.Offset(index*8), RAX)
+ BSWAPQ(RAX)
+ MOVQ(RAX, Mem{Base: BP}.Offset(index*8))
+}
+
+// Wt = SIGMA1(Wt-2) + Wt-7 + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79
+//
+// SIGMA0(x) = ROTR(1,x) XOR ROTR(8,x) XOR SHR(7,x)
+// SIGMA1(x) = ROTR(19,x) XOR ROTR(61,x) XOR SHR(6,x)
+//
+// Line 58
+func MSGSCHEDULE1(index int) {
+ MOVQ(Mem{Base: BP}.Offset((index-2)*8), RAX)
+ MOVQ(RAX, RCX)
+ RORQ(Imm(19), RAX)
+ MOVQ(RCX, RDX)
+ RORQ(Imm(61), RCX)
+ SHRQ(Imm(6), RDX)
+ MOVQ(Mem{Base: BP}.Offset((index-15)*8), RBX)
+ XORQ(RCX, RAX)
+ MOVQ(RBX, RCX)
+ XORQ(RDX, RAX)
+ RORQ(Imm(1), RBX)
+ MOVQ(RCX, RDX)
+ SHRQ(Imm(7), RDX)
+ RORQ(Imm(8), RCX)
+ ADDQ(Mem{Base: BP}.Offset((index-7)*8), RAX)
+ XORQ(RCX, RBX)
+ XORQ(RDX, RBX)
+ ADDQ(Mem{Base: BP}.Offset((index-16)*8), RBX)
+ ADDQ(RBX, RAX)
+ MOVQ(RAX, Mem{Base: BP}.Offset((index)*8))
+}
+
+// Calculate T1 in AX - uses AX, CX and DX registers.
+// h is also used as an accumulator. Wt is passed in AX.
+//
+// T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + Kt + Wt
+// BIGSIGMA1(x) = ROTR(14,x) XOR ROTR(18,x) XOR ROTR(41,x)
+// Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
+//
+// Line 85
+func SHA512T1(konst uint64, e, f, g, h GPPhysical) {
+ MOVQ(U64(konst), RDX)
+ ADDQ(RAX, h)
+ MOVQ(e, RAX)
+ ADDQ(RDX, h)
+ MOVQ(e, RCX)
+ RORQ(U8(14), RAX)
+ MOVQ(e, RDX)
+ RORQ(U8(18), RCX)
+ XORQ(RCX, RAX)
+ MOVQ(e, RCX)
+ RORQ(U8(41), RDX)
+ ANDQ(f, RCX)
+ XORQ(RAX, RDX)
+ MOVQ(e, RAX)
+ NOTQ(RAX)
+ ADDQ(RDX, h)
+ ANDQ(g, RAX)
+ XORQ(RCX, RAX)
+ ADDQ(h, RAX)
+}
+
+// Calculate T2 in BX - uses BX, CX, DX and DI registers.
+//
+// T2 = BIGSIGMA0(a) + Maj(a, b, c)
+// BIGSIGMA0(x) = ROTR(28,x) XOR ROTR(34,x) XOR ROTR(39,x)
+// Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
+//
+// Line 110
+func SHA512T2(a, b, c GPPhysical) {
+ MOVQ(a, RDI)
+ MOVQ(c, RBX)
+ RORQ(Imm(28), RDI)
+ MOVQ(a, RDX)
+ ANDQ(b, RBX)
+ RORQ(Imm(34), RDX)
+ MOVQ(a, RCX)
+ ANDQ(c, RCX)
+ XORQ(RDX, RDI)
+ XORQ(RCX, RBX)
+ MOVQ(a, RDX)
+ MOVQ(b, RCX)
+ RORQ(Imm(39), RDX)
+ ANDQ(a, RCX)
+ XORQ(RCX, RBX)
+ XORQ(RDX, RDI)
+ ADDQ(RDI, RBX)
+}
+
+// Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
+// The values for e and a are stored in d and h, ready for rotation.
+//
+// Line 131
+func SHA512ROUND(index int, konst uint64, a, b, c, d, e, f, g, h GPPhysical) {
+ SHA512T1(konst, e, f, g, h)
+ SHA512T2(a, b, c)
+ MOVQ(RBX, h)
+ ADDQ(RAX, d)
+ ADDQ(RAX, h)
+}
+
+// Line 169
+func SHA512ROUND0(index int, konst uint64, a, b, c, d, e, f, g, h GPPhysical) {
+ MSGSCHEDULE0(index)
+ SHA512ROUND(index, konst, a, b, c, d, e, f, g, h)
+}
+
+// Line 142
+func SHA512ROUND1(index int, konst uint64, a, b, c, d, e, f, g, h GPPhysical) {
+ MSGSCHEDULE1(index)
+ SHA512ROUND(index, konst, a, b, c, d, e, f, g, h)
+}
+
+// Line 146
+func blockAMD64() {
+ Implement("blockAMD64")
+ AllocLocal(648)
+
+ Load(Param("p").Base(), RSI)
+ Load(Param("p").Len(), RDX)
+ SHRQ(Imm(7), RDX)
+ SHLQ(Imm(7), RDX)
+
+ LEAQ(Mem{Base: SI, Index: DX, Scale: 1}, RDI)
+ MOVQ(RDI, Mem{Base: SP}.Offset(640))
+ CMPQ(RSI, RDI)
+ JEQ(LabelRef("end"))
+
+ Load(Param("dig"), RBP)
+ MOVQ(Mem{Base: BP}.Offset(0*8), R8) // a = H0
+ MOVQ(Mem{Base: BP}.Offset(1*8), R9) // b = H1
+ MOVQ(Mem{Base: BP}.Offset(2*8), R10) // c = H2
+ MOVQ(Mem{Base: BP}.Offset(3*8), R11) // d = H3
+ MOVQ(Mem{Base: BP}.Offset(4*8), R12) // e = H4
+ MOVQ(Mem{Base: BP}.Offset(5*8), R13) // f = H5
+ MOVQ(Mem{Base: BP}.Offset(6*8), R14) // g = H6
+ MOVQ(Mem{Base: BP}.Offset(7*8), R15) // h = H7
+ PSHUFFLE_BYTE_FLIP_MASK_DATA()
+ loop()
+ end()
+}
+
+func rotateRight(slice *[]GPPhysical) []GPPhysical {
+ n := len(*slice)
+ new := make([]GPPhysical, n)
+ for i, reg := range *slice {
+ new[(i+1)%n] = reg
+ }
+ return new
+}
+
+// Line 167
+func loop() {
+ Label("loop")
+ MOVQ(RSP, RBP) // message schedule
+
+ n := len(_K)
+ regs := []GPPhysical{R8, R9, R10, R11, R12, R13, R14, R15}
+
+ for i := 0; i < 16; i++ {
+ SHA512ROUND0(i, _K[i], regs[0], regs[1], regs[2], regs[3], regs[4], regs[5], regs[6], regs[7])
+ regs = rotateRight(®s)
+ }
+
+ for i := 16; i < n; i++ {
+ SHA512ROUND1(i, _K[i], regs[0], regs[1], regs[2], regs[3], regs[4], regs[5], regs[6], regs[7])
+ regs = rotateRight(®s)
+ }
+
+ Load(Param("dig"), RBP)
+
+ BP_Mem := Mem{Base: BP}
+ ADDQ(BP_Mem.Offset(0*8), R8) // H0 = a + H0
+ MOVQ(R8, BP_Mem.Offset(0*8))
+ ADDQ(BP_Mem.Offset(1*8), R9) // H1 = b + H1
+ MOVQ(R9, BP_Mem.Offset(1*8))
+ ADDQ(BP_Mem.Offset(2*8), R10) // H2 = c + H2
+ MOVQ(R10, BP_Mem.Offset(2*8))
+ ADDQ(BP_Mem.Offset(3*8), R11) // H3 = d + H3
+ MOVQ(R11, BP_Mem.Offset(3*8))
+ ADDQ(BP_Mem.Offset(4*8), R12) // H4 = e + H4
+ MOVQ(R12, BP_Mem.Offset(4*8))
+ ADDQ(BP_Mem.Offset(5*8), R13) // H5 = f + H5
+ MOVQ(R13, BP_Mem.Offset(5*8))
+ ADDQ(BP_Mem.Offset(6*8), R14) // H6 = g + H6
+ MOVQ(R14, BP_Mem.Offset(6*8))
+ ADDQ(BP_Mem.Offset(7*8), R15) // H7 = h + H7
+ MOVQ(R15, BP_Mem.Offset(7*8))
+
+ ADDQ(Imm(128), RSI)
+ CMPQ(RSI, Mem{Base: SP}.Offset(640))
+ JB(LabelRef("loop"))
+}
+
+// Line 274
+func end() {
+ Label("end")
+ RET()
+}
+
+// Version below is based on "Fast SHA512 Implementations on Intel
+// Architecture Processors" White-paper
+// https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-sha512-implementations-ia-processors-paper.pdf
+// AVX2 version by Intel, same algorithm in Linux kernel:
+// https://github.com/torvalds/linux/blob/master/arch/x86/crypto/sha512-avx2-asm.S
+
+// James Guilford <james.guilford@intel.com>
+// Kirk Yap <kirk.s.yap@intel.com>
+// Tim Chen <tim.c.chen@linux.intel.com>
+// David Cote <david.m.cote@intel.com>
+// Aleksey Sidorov <aleksey.sidorov@intel.com>
+
+// Line 289
+var (
+ YFER_SIZE int = (4 * 8)
+ SRND_SIZE = (1 * 8)
+ INP_SIZE = (1 * 8)
+
+ frame_YFER = (0)
+ frame_SRND = (frame_YFER + YFER_SIZE)
+ frame_INP = (frame_SRND + SRND_SIZE)
+ frame_INPEND = (frame_INP + INP_SIZE)
+)
+
+// Line 298
+func addm(p1 Mem, p2 GPPhysical) {
+ ADDQ(p1, p2)
+ MOVQ(p2, p1)
+}
+
+// Line 302
+func COPY_YMM_AND_BSWAP(p1 VecPhysical, p2 Mem, p3 VecPhysical) {
+ VMOVDQU(p2, p1)
+ VPSHUFB(p3, p1, p1)
+}
+
+// Line 306
+func MY_VPALIGNR(YDST, YSRC1, YSRC2 VecPhysical, RVAL int) {
+ VPERM2F128(U8(0x3), YSRC2, YSRC1, YDST)
+ VPALIGNR(U8(RVAL), YSRC2, YDST, YDST)
+}
+
+// Line 324
+func blockAVX2() {
+ Implement("blockAVX2")
+ Attributes(NOSPLIT)
+ AllocLocal(56)
+
+ Load(Param("dig"), RSI)
+ Load(Param("p").Base(), RDI)
+ Load(Param("p").Len(), RDX)
+
+ SHRQ(Imm(7), RDX)
+ SHLQ(Imm(7), RDX)
+
+ JZ(LabelRef("done_hash"))
+ ADDQ(RDI, RDX)
+ MOVQ(RDX, Mem{Base: SP}.Offset(frame_INPEND))
+
+ MOVQ(Mem{Base: SI}.Offset(0*8), RAX)
+ MOVQ(Mem{Base: SI}.Offset(1*8), RBX)
+ MOVQ(Mem{Base: SI}.Offset(2*8), RCX)
+ MOVQ(Mem{Base: SI}.Offset(3*8), R8)
+ MOVQ(Mem{Base: SI}.Offset(4*8), RDX)
+ MOVQ(Mem{Base: SI}.Offset(5*8), R9)
+ MOVQ(Mem{Base: SI}.Offset(6*8), R10)
+ MOVQ(Mem{Base: SI}.Offset(7*8), R11)
+
+ PSHUFFLE_BYTE_FLIP_MASK := PSHUFFLE_BYTE_FLIP_MASK_DATA()
+ VMOVDQU(PSHUFFLE_BYTE_FLIP_MASK, Y9)
+
+ loop0()
+ loop1()
+ loop2()
+ done_hash()
+}
+
+// Line 347
+func loop0() {
+ Label("loop0")
+
+ _K := NewDataAddr(Symbol{Name: ThatPeskyUnicodeDot + "_K"}, 0)
+ MOVQ(_K, RBP)
+
+ // byte swap first 16 dwords
+ COPY_YMM_AND_BSWAP(Y4, Mem{Base: DI}.Offset(0*32), Y9)
+ COPY_YMM_AND_BSWAP(Y5, Mem{Base: DI}.Offset(1*32), Y9)
+ COPY_YMM_AND_BSWAP(Y6, Mem{Base: DI}.Offset(2*32), Y9)
+ COPY_YMM_AND_BSWAP(Y7, Mem{Base: DI}.Offset(3*32), Y9)
+
+ MOVQ(RDI, Mem{Base: SP}.Offset(frame_INP))
+
+ // schedule 64 input dwords, by doing 12 rounds of 4 each
+ MOVQ(U32(4), Mem{Base: SP}.Offset(frame_SRND))
+}
+
+// Line 361
+func loop1() {
+ Label("loop1")
+ VPADDQ(Mem{Base: BP}, Y4, Y0)
+ VMOVDQU(Y0, Mem{Base: SP}.Offset(frame_YFER))
+
+ MY_VPALIGNR(Y0, Y7, Y6, 8)
+
+ VPADDQ(Y4, Y0, Y0)
+
+ MY_VPALIGNR(Y1, Y5, Y4, 8)
+
+ VPSRLQ(Imm(1), Y1, Y2)
+ VPSLLQ(Imm(64-1), Y1, Y3)
+ VPOR(Y2, Y3, Y3)
+
+ VPSRLQ(Imm(7), Y1, Y8)
+
+ MOVQ(RAX, RDI)
+ RORXQ(Imm(41), RDX, R13)
+ RORXQ(Imm(18), RDX, R14)
+ ADDQ(Mem{Base: SP}.Offset(frame_YFER), R11)
+ ORQ(RCX, RDI)
+ MOVQ(R9, R15)
+ RORXQ(Imm(34), RAX, R12)
+
+ XORQ(R14, R13)
+ XORQ(R10, R15)
+ RORXQ(Imm(14), RDX, R14)
+
+ ANDQ(RDX, R15)
+ XORQ(R14, R13)
+ RORXQ(Imm(39), RAX, R14)
+ ADDQ(R11, R8)
+
+ ANDQ(RBX, RDI)
+ XORQ(R12, R14)
+ RORXQ(Imm(28), RAX, R12)
+
+ XORQ(R10, R15)
+ XORQ(R12, R14)
+ MOVQ(RAX, R12)
+ ANDQ(RCX, R12)
+
+ ADDQ(R13, R15)
+ ORQ(R12, RDI)
+ ADDQ(R14, R11)
+
+ ADDQ(R15, R8)
+
+ ADDQ(R15, R11)
+ ADDQ(RDI, R11)
+
+ VPSRLQ(Imm(8), Y1, Y2)
+ VPSLLQ(Imm(64-8), Y1, Y1)
+ VPOR(Y2, Y1, Y1)
+
+ VPXOR(Y8, Y3, Y3)
+ VPXOR(Y1, Y3, Y1)
+
+ VPADDQ(Y1, Y0, Y0)
+
+ VPERM2F128(Imm(0x0), Y0, Y0, Y4)
+
+ MASK_YMM_LO := MASK_YMM_LO_DATA()
+ VPAND(MASK_YMM_LO, Y0, Y0)
+
+ VPERM2F128(Imm(0x11), Y7, Y7, Y2)
+ VPSRLQ(Imm(6), Y2, Y8)
+
+ MOVQ(R11, RDI)
+ RORXQ(Imm(41), R8, R13)
+ RORXQ(Imm(18), R8, R14)
+ ADDQ(Mem{Base: SP}.Offset(1*8+frame_YFER), R10)
+ ORQ(RBX, RDI)
+
+ MOVQ(RDX, R15)
+ RORXQ(Imm(34), R11, R12)
+ XORQ(R14, R13)
+ XORQ(R9, R15)
+
+ RORXQ(Imm(14), R8, R14)
+ XORQ(R14, R13)
+ RORXQ(Imm(39), R11, R14)
+ ANDQ(R8, R15)
+ ADDQ(R10, RCX)
+
+ ANDQ(RAX, RDI)
+ XORQ(R12, R14)
+
+ RORXQ(Imm(28), R11, R12)
+ XORQ(R9, R15)
+
+ XORQ(R12, R14)
+ MOVQ(R11, R12)
+ ANDQ(RBX, R12)
+ ADDQ(R13, R15)
+
+ ORQ(R12, RDI)
+ ADDQ(R14, R10)
+
+ ADDQ(R15, RCX)
+ ADDQ(R15, R10)
+ ADDQ(RDI, R10)
+
+ VPSRLQ(Imm(19), Y2, Y3)
+ VPSLLQ(Imm(64-19), Y2, Y1)
+ VPOR(Y1, Y3, Y3)
+ VPXOR(Y3, Y8, Y8)
+ VPSRLQ(Imm(61), Y2, Y3)
+ VPSLLQ(Imm(64-61), Y2, Y1)
+ VPOR(Y1, Y3, Y3)
+ VPXOR(Y3, Y8, Y8)
+
+ VPADDQ(Y8, Y4, Y4)
+
+ VPSRLQ(Imm(6), Y4, Y8)
+
+ MOVQ(R10, RDI)
+ RORXQ(Imm(41), RCX, R13)
+ ADDQ(Mem{Base: SP}.Offset(2*8+frame_YFER), R9)
+
+ RORXQ(Imm(18), RCX, R14)
+ ORQ(RAX, RDI)
+ MOVQ(R8, R15)
+ XORQ(RDX, R15)
+
+ RORXQ(Imm(34), R10, R12)
+ XORQ(R14, R13)
+ ANDQ(RCX, R15)
+
+ RORXQ(Imm(14), RCX, R14)
+ ADDQ(R9, RBX)
+ ANDQ(R11, RDI)
+
+ XORQ(R14, R13)
+ RORXQ(Imm(39), R10, R14)
+ XORQ(RDX, R15)
+
+ XORQ(R12, R14)
+ RORXQ(Imm(28), R10, R12)
+
+ XORQ(R12, R14)
+ MOVQ(R10, R12)
+ ANDQ(RAX, R12)
+ ADDQ(R13, R15)
+
+ ORQ(R12, RDI)
+ ADDQ(R14, R9)
+ ADDQ(R15, RBX)
+ ADDQ(R15, R9)
+
+ ADDQ(RDI, R9)
+
+ VPSRLQ(Imm(19), Y4, Y3)
+ VPSLLQ(Imm(64-19), Y4, Y1)
+ VPOR(Y1, Y3, Y3)
+ VPXOR(Y3, Y8, Y8)
+ VPSRLQ(Imm(61), Y4, Y3)
+ VPSLLQ(Imm(64-61), Y4, Y1)
+ VPOR(Y1, Y3, Y3)
+ VPXOR(Y3, Y8, Y8)
+
+ VPADDQ(Y8, Y0, Y2)
+
+ VPBLENDD(Imm(0xF0), Y2, Y4, Y4)
+
+ MOVQ(R9, RDI)
+ RORXQ(Imm(41), RBX, R13)
+ RORXQ(Imm(18), RBX, R14)
+ ADDQ(Mem{Base: SP}.Offset(3*8+frame_YFER), RDX)
+ ORQ(R11, RDI)
+
+ MOVQ(RCX, R15)
+ RORXQ(Imm(34), R9, R12)
+ XORQ(R14, R13)
+ XORQ(R8, R15)
+
+ RORXQ(Imm(14), RBX, R14)
+ ANDQ(RBX, R15)
+ ADDQ(RDX, RAX)
+ ANDQ(R10, RDI)
+
+ XORQ(R14, R13)
+ XORQ(R8, R15)
+
+ RORXQ(Imm(39), R9, R14)
+ ADDQ(R13, R15)
+
+ XORQ(R12, R14)
+ ADDQ(R15, RAX)
+
+ RORXQ(Imm(28), R9, R12)
+
+ XORQ(R12, R14)
+ MOVQ(R9, R12)
+ ANDQ(R11, R12)
+ ORQ(R12, RDI)
+
+ ADDQ(R14, RDX)
+ ADDQ(R15, RDX)
+ ADDQ(RDI, RDX)
+
+ VPADDQ(Mem{Base: BP}.Offset(1*32), Y5, Y0)
+ VMOVDQU(Y0, Mem{Base: SP}.Offset(frame_YFER))
+
+ MY_VPALIGNR(Y0, Y4, Y7, 8)
+
+ VPADDQ(Y5, Y0, Y0)
+
+ MY_VPALIGNR(Y1, Y6, Y5, 8)
+
+ VPSRLQ(Imm(1), Y1, Y2)
+ VPSLLQ(Imm(64-1), Y1, Y3)
+ VPOR(Y2, Y3, Y3)
+
+ VPSRLQ(Imm(7), Y1, Y8)
+
+ MOVQ(RDX, RDI)
+ RORXQ(Imm(41), RAX, R13)
+ RORXQ(Imm(18), RAX, R14)
+ ADDQ(Mem{Base: SP}.Offset(frame_YFER), R8)
+ ORQ(R10, RDI)
+ MOVQ(RBX, R15)
+ RORXQ(Imm(34), RDX, R12)
+
+ XORQ(R14, R13)
+ XORQ(RCX, R15)
+ RORXQ(Imm(14), RAX, R14)
+
+ ANDQ(RAX, R15)
+ XORQ(R14, R13)
+ RORXQ(Imm(39), RDX, R14)
+ ADDQ(R8, R11)
+
+ ANDQ(R9, RDI)
+ XORQ(R12, R14)
+ RORXQ(Imm(28), RDX, R12)
+
+ XORQ(RCX, R15)
+ XORQ(R12, R14)
+ MOVQ(RDX, R12)
+ ANDQ(R10, R12)
+
+ ADDQ(R13, R15)
+ ORQ(R12, RDI)
+ ADDQ(R14, R8)
+
+ ADDQ(R15, R11)
+
+ ADDQ(R15, R8)
+ ADDQ(RDI, R8)
+
+ VPSRLQ(Imm(8), Y1, Y2)
+ VPSLLQ(Imm(64-8), Y1, Y1)
+ VPOR(Y2, Y1, Y1)
+
+ VPXOR(Y8, Y3, Y3)
+ VPXOR(Y1, Y3, Y1)
+
+ VPADDQ(Y1, Y0, Y0)
+
+ VPERM2F128(Imm(0x0), Y0, Y0, Y5)
+
+ VPAND(MASK_YMM_LO, Y0, Y0)
+
+ VPERM2F128(Imm(0x11), Y4, Y4, Y2)
+ VPSRLQ(Imm(6), Y2, Y8)
+
+ MOVQ(R8, RDI)
+ RORXQ(Imm(41), R11, R13)
+ RORXQ(Imm(18), R11, R14)
+ ADDQ(Mem{Base: SP}.Offset(1*8+frame_YFER), RCX)
+ ORQ(R9, RDI)
+
+ MOVQ(RAX, R15)
+ RORXQ(Imm(34), R8, R12)
+ XORQ(R14, R13)
+ XORQ(RBX, R15)
+
+ RORXQ(Imm(14), R11, R14)
+ XORQ(R14, R13)
+ RORXQ(Imm(39), R8, R14)
+ ANDQ(R11, R15)
+ ADDQ(RCX, R10)
+
+ ANDQ(RDX, RDI)
+ XORQ(R12, R14)
+
+ RORXQ(Imm(28), R8, R12)
+ XORQ(RBX, R15)
+
+ XORQ(R12, R14)
+ MOVQ(R8, R12)
+ ANDQ(R9, R12)
+ ADDQ(R13, R15)
+
+ ORQ(R12, RDI)
+ ADDQ(R14, RCX)
+
+ ADDQ(R15, R10)
+ ADDQ(R15, RCX)
+ ADDQ(RDI, RCX)
+
+ VPSRLQ(Imm(19), Y2, Y3)
+ VPSLLQ(Imm(64-19), Y2, Y1)
+ VPOR(Y1, Y3, Y3)
+ VPXOR(Y3, Y8, Y8)
+ VPSRLQ(Imm(61), Y2, Y3)
+ VPSLLQ(Imm(64-61), Y2, Y1)
+ VPOR(Y1, Y3, Y3)
+ VPXOR(Y3, Y8, Y8)
+
+ VPADDQ(Y8, Y5, Y5)
+
+ VPSRLQ(Imm(6), Y5, Y8)
+
+ MOVQ(RCX, RDI)
+ RORXQ(Imm(41), R10, R13)
+ ADDQ(Mem{Base: SP}.Offset(2*8+frame_YFER), RBX)
+
+ RORXQ(Imm(18), R10, R14)
+ ORQ(RDX, RDI)
+ MOVQ(R11, R15)
+ XORQ(RAX, R15)
+
+ RORXQ(Imm(34), RCX, R12)
+ XORQ(R14, R13)
+ ANDQ(R10, R15)
+
+ RORXQ(Imm(14), R10, R14)
+ ADDQ(RBX, R9)
+ ANDQ(R8, RDI)
+
+ XORQ(R14, R13)
+ RORXQ(Imm(39), RCX, R14)
+ XORQ(RAX, R15)
+
+ XORQ(R12, R14)
+ RORXQ(Imm(28), RCX, R12)
+
+ XORQ(R12, R14)
+ MOVQ(RCX, R12)
+ ANDQ(RDX, R12)
+ ADDQ(R13, R15)
+
+ ORQ(R12, RDI)
+ ADDQ(R14, RBX)
+ ADDQ(R15, R9)
+ ADDQ(R15, RBX)
+
+ ADDQ(RDI, RBX)
+
+ VPSRLQ(Imm(19), Y5, Y3)
+ VPSLLQ(Imm(64-19), Y5, Y1)
+ VPOR(Y1, Y3, Y3)
+ VPXOR(Y3, Y8, Y8)
+ VPSRLQ(Imm(61), Y5, Y3)
+ VPSLLQ(Imm(64-61), Y5, Y1)
+ VPOR(Y1, Y3, Y3)
+ VPXOR(Y3, Y8, Y8)
+
+ VPADDQ(Y8, Y0, Y2)
+
+ VPBLENDD(Imm(0xF0), Y2, Y5, Y5)
+
+ MOVQ(RBX, RDI)
+ RORXQ(Imm(41), R9, R13)
+ RORXQ(Imm(18), R9, R14)
+ ADDQ(Mem{Base: SP}.Offset(3*8+frame_YFER), RAX)
+ ORQ(R8, RDI)
+
+ MOVQ(R10, R15)
+ RORXQ(Imm(34), RBX, R12)
+ XORQ(R14, R13)
+ XORQ(R11, R15)
+
+ RORXQ(Imm(14), R9, R14)
+ ANDQ(R9, R15)
+ ADDQ(RAX, RDX)
+ ANDQ(RCX, RDI)
+
+ XORQ(R14, R13)
+ XORQ(R11, R15)
+
+ RORXQ(Imm(39), RBX, R14)
+ ADDQ(R13, R15)
+
+ XORQ(R12, R14)
+ ADDQ(R15, RDX)
+
+ RORXQ(Imm(28), RBX, R12)
+
+ XORQ(R12, R14)
+ MOVQ(RBX, R12)
+ ANDQ(R8, R12)
+ ORQ(R12, RDI)
+
+ ADDQ(R14, RAX)
+ ADDQ(R15, RAX)
+ ADDQ(RDI, RAX)
+
+ VPADDQ(Mem{Base: BP}.Offset(2*32), Y6, Y0)
+ VMOVDQU(Y0, Mem{Base: SP}.Offset(frame_YFER))
+
+ MY_VPALIGNR(Y0, Y5, Y4, 8)
+
+ VPADDQ(Y6, Y0, Y0)
+
+ MY_VPALIGNR(Y1, Y7, Y6, 8)
+
+ VPSRLQ(Imm(1), Y1, Y2)
+ VPSLLQ(Imm(64-1), Y1, Y3)
+ VPOR(Y2, Y3, Y3)
+
+ VPSRLQ(Imm(7), Y1, Y8)
+
+ MOVQ(RAX, RDI)
+ RORXQ(Imm(41), RDX, R13)
+ RORXQ(Imm(18), RDX, R14)
+ ADDQ(Mem{Base: SP}.Offset(frame_YFER), R11)
+ ORQ(RCX, RDI)
+ MOVQ(R9, R15)
+ RORXQ(Imm(34), RAX, R12)
+
+ XORQ(R14, R13)
+ XORQ(R10, R15)
+ RORXQ(Imm(14), RDX, R14)
+
+ ANDQ(RDX, R15)
+ XORQ(R14, R13)
+ RORXQ(Imm(39), RAX, R14)
+ ADDQ(R11, R8)
+
+ ANDQ(RBX, RDI)
+ XORQ(R12, R14)
+ RORXQ(Imm(28), RAX, R12)
+
+ XORQ(R10, R15)
+ XORQ(R12, R14)
+ MOVQ(RAX, R12)
+ ANDQ(RCX, R12)
+
+ ADDQ(R13, R15)
+ ORQ(R12, RDI)
+ ADDQ(R14, R11)
+
+ ADDQ(R15, R8)
+
+ ADDQ(R15, R11)
+ ADDQ(RDI, R11)
+
+ VPSRLQ(Imm(8), Y1, Y2)
+ VPSLLQ(Imm(64-8), Y1, Y1)
+ VPOR(Y2, Y1, Y1)
+
+ VPXOR(Y8, Y3, Y3)
+ VPXOR(Y1, Y3, Y1)
+
+ VPADDQ(Y1, Y0, Y0)
+
+ VPERM2F128(Imm(0x0), Y0, Y0, Y6)
+
+ VPAND(MASK_YMM_LO, Y0, Y0)
+
+ VPERM2F128(Imm(0x11), Y5, Y5, Y2)
+ VPSRLQ(Imm(6), Y2, Y8)
+
+ MOVQ(R11, RDI)
+ RORXQ(Imm(41), R8, R13)
+ RORXQ(Imm(18), R8, R14)
+ ADDQ(Mem{Base: SP}.Offset(1*8+frame_YFER), R10)
+ ORQ(RBX, RDI)
+
+ MOVQ(RDX, R15)
+ RORXQ(Imm(34), R11, R12)
+ XORQ(R14, R13)
+ XORQ(R9, R15)
+
+ RORXQ(Imm(14), R8, R14)
+ XORQ(R14, R13)
+ RORXQ(Imm(39), R11, R14)
+ ANDQ(R8, R15)
+ ADDQ(R10, RCX)
+
+ ANDQ(RAX, RDI)
+ XORQ(R12, R14)
+
+ RORXQ(Imm(28), R11, R12)
+ XORQ(R9, R15)
+
+ XORQ(R12, R14)
+ MOVQ(R11, R12)
+ ANDQ(RBX, R12)
+ ADDQ(R13, R15)
+
+ ORQ(R12, RDI)
+ ADDQ(R14, R10)
+
+ ADDQ(R15, RCX)
+ ADDQ(R15, R10)
+ ADDQ(RDI, R10)
+
+ VPSRLQ(Imm(19), Y2, Y3)
+ VPSLLQ(Imm(64-19), Y2, Y1)
+ VPOR(Y1, Y3, Y3)
+ VPXOR(Y3, Y8, Y8)
+ VPSRLQ(Imm(61), Y2, Y3)
+ VPSLLQ(Imm(64-61), Y2, Y1)
+ VPOR(Y1, Y3, Y3)
+ VPXOR(Y3, Y8, Y8)
+
+ VPADDQ(Y8, Y6, Y6)
+
+ VPSRLQ(Imm(6), Y6, Y8)
+
+ MOVQ(R10, RDI)
+ RORXQ(Imm(41), RCX, R13)
+ ADDQ(Mem{Base: SP}.Offset(2*8+frame_YFER), R9)
+
+ RORXQ(Imm(18), RCX, R14)
+ ORQ(RAX, RDI)
+ MOVQ(R8, R15)
+ XORQ(RDX, R15)
+
+ RORXQ(Imm(34), R10, R12)
+ XORQ(R14, R13)
+ ANDQ(RCX, R15)
+
+ RORXQ(Imm(14), RCX, R14)
+ ADDQ(R9, RBX)
+ ANDQ(R11, RDI)
+
+ XORQ(R14, R13)
+ RORXQ(Imm(39), R10, R14)
+ XORQ(RDX, R15)
+
+ XORQ(R12, R14)
+ RORXQ(Imm(28), R10, R12)
+
+ XORQ(R12, R14)
+ MOVQ(R10, R12)
+ ANDQ(RAX, R12)
+ ADDQ(R13, R15)
+
+ ORQ(R12, RDI)
+ ADDQ(R14, R9)
+ ADDQ(R15, RBX)
+ ADDQ(R15, R9)
+
+ ADDQ(RDI, R9)
+
+ VPSRLQ(Imm(19), Y6, Y3)
+ VPSLLQ(Imm(64-19), Y6, Y1)
+ VPOR(Y1, Y3, Y3)
+ VPXOR(Y3, Y8, Y8)
+ VPSRLQ(Imm(61), Y6, Y3)
+ VPSLLQ(Imm(64-61), Y6, Y1)
+ VPOR(Y1, Y3, Y3)
+ VPXOR(Y3, Y8, Y8)
+
+ VPADDQ(Y8, Y0, Y2)
+
+ VPBLENDD(Imm(0xF0), Y2, Y6, Y6)
+
+ MOVQ(R9, RDI)
+ RORXQ(Imm(41), RBX, R13)
+ RORXQ(Imm(18), RBX, R14)
+ ADDQ(Mem{Base: SP}.Offset(3*8+frame_YFER), RDX)
+ ORQ(R11, RDI)
+
+ MOVQ(RCX, R15)
+ RORXQ(Imm(34), R9, R12)
+ XORQ(R14, R13)
+ XORQ(R8, R15)
+
+ RORXQ(Imm(14), RBX, R14)
+ ANDQ(RBX, R15)
+ ADDQ(RDX, RAX)
+ ANDQ(R10, RDI)
+
+ XORQ(R14, R13)
+ XORQ(R8, R15)
+
+ RORXQ(Imm(39), R9, R14)
+ ADDQ(R13, R15)
+
+ XORQ(R12, R14)
+ ADDQ(R15, RAX)
+
+ RORXQ(Imm(28), R9, R12)
+
+ XORQ(R12, R14)
+ MOVQ(R9, R12)
+ ANDQ(R11, R12)
+ ORQ(R12, RDI)
+
+ ADDQ(R14, RDX)
+ ADDQ(R15, RDX)
+ ADDQ(RDI, RDX)
+
+ VPADDQ(Mem{Base: BP}.Offset(3*32), Y7, Y0)
+ VMOVDQU(Y0, Mem{Base: SP}.Offset(frame_YFER))
+ ADDQ(U8(4*32), RBP)
+
+ MY_VPALIGNR(Y0, Y6, Y5, 8)
+
+ VPADDQ(Y7, Y0, Y0)
+
+ MY_VPALIGNR(Y1, Y4, Y7, 8)
+
+ VPSRLQ(Imm(1), Y1, Y2)
+ VPSLLQ(Imm(64-1), Y1, Y3)
+ VPOR(Y2, Y3, Y3)
+
+ VPSRLQ(Imm(7), Y1, Y8)
+
+ MOVQ(RDX, RDI)
+ RORXQ(Imm(41), RAX, R13)
+ RORXQ(Imm(18), RAX, R14)
+ ADDQ(Mem{Base: SP}.Offset(frame_YFER), R8)
+ ORQ(R10, RDI)
+ MOVQ(RBX, R15)
+ RORXQ(Imm(34), RDX, R12)
+
+ XORQ(R14, R13)
+ XORQ(RCX, R15)
+ RORXQ(Imm(14), RAX, R14)
+
+ ANDQ(RAX, R15)
+ XORQ(R14, R13)
+ RORXQ(Imm(39), RDX, R14)
+ ADDQ(R8, R11)
+
+ ANDQ(R9, RDI)
+ XORQ(R12, R14)
+ RORXQ(Imm(28), RDX, R12)
+
+ XORQ(RCX, R15)
+ XORQ(R12, R14)
+ MOVQ(RDX, R12)
+ ANDQ(R10, R12)
+
+ ADDQ(R13, R15)
+ ORQ(R12, RDI)
+ ADDQ(R14, R8)
+
+ ADDQ(R15, R11)
+
+ ADDQ(R15, R8)
+ ADDQ(RDI, R8)
+
+ VPSRLQ(Imm(8), Y1, Y2)
+ VPSLLQ(Imm(64-8), Y1, Y1)
+ VPOR(Y2, Y1, Y1)
+
+ VPXOR(Y8, Y3, Y3)
+ VPXOR(Y1, Y3, Y1)
+
+ VPADDQ(Y1, Y0, Y0)
+
+ VPERM2F128(Imm(0x0), Y0, Y0, Y7)
+
+ VPAND(MASK_YMM_LO, Y0, Y0)
+
+ VPERM2F128(Imm(0x11), Y6, Y6, Y2)
+ VPSRLQ(Imm(6), Y2, Y8)
+
+ MOVQ(R8, RDI)
+ RORXQ(Imm(41), R11, R13)
+ RORXQ(Imm(18), R11, R14)
+ ADDQ(Mem{Base: SP}.Offset(1*8+frame_YFER), RCX)
+ ORQ(R9, RDI)
+
+ MOVQ(RAX, R15)
+ RORXQ(Imm(34), R8, R12)
+ XORQ(R14, R13)
+ XORQ(RBX, R15)
+
+ RORXQ(Imm(14), R11, R14)
+ XORQ(R14, R13)
+ RORXQ(Imm(39), R8, R14)
+ ANDQ(R11, R15)
+ ADDQ(RCX, R10)
+
+ ANDQ(RDX, RDI)
+ XORQ(R12, R14)
+
+ RORXQ(Imm(28), R8, R12)
+ XORQ(RBX, R15)
+
+ XORQ(R12, R14)
+ MOVQ(R8, R12)
+ ANDQ(R9, R12)
+ ADDQ(R13, R15)
+
+ ORQ(R12, RDI)
+ ADDQ(R14, RCX)
+
+ ADDQ(R15, R10)
+ ADDQ(R15, RCX)
+ ADDQ(RDI, RCX)
+
+ VPSRLQ(Imm(19), Y2, Y3)
+ VPSLLQ(Imm(64-19), Y2, Y1)
+ VPOR(Y1, Y3, Y3)
+ VPXOR(Y3, Y8, Y8)
+ VPSRLQ(Imm(61), Y2, Y3)
+ VPSLLQ(Imm(64-61), Y2, Y1)
+ VPOR(Y1, Y3, Y3)
+ VPXOR(Y3, Y8, Y8)
+
+ VPADDQ(Y8, Y7, Y7)
+
+ VPSRLQ(Imm(6), Y7, Y8)
+
+ MOVQ(RCX, RDI)
+ RORXQ(Imm(41), R10, R13)
+ ADDQ(Mem{Base: SP}.Offset(2*8+frame_YFER), RBX)
+
+ RORXQ(Imm(18), R10, R14)
+ ORQ(RDX, RDI)
+ MOVQ(R11, R15)
+ XORQ(RAX, R15)
+
+ RORXQ(Imm(34), RCX, R12)
+ XORQ(R14, R13)
+ ANDQ(R10, R15)
+
+ RORXQ(Imm(14), R10, R14)
+ ADDQ(RBX, R9)
+ ANDQ(R8, RDI)
+
+ XORQ(R14, R13)
+ RORXQ(Imm(39), RCX, R14)
+ XORQ(RAX, R15)
+
+ XORQ(R12, R14)
+ RORXQ(Imm(28), RCX, R12)
+
+ XORQ(R12, R14)
+ MOVQ(RCX, R12)
+ ANDQ(RDX, R12)
+ ADDQ(R13, R15)
+
+ ORQ(R12, RDI)
+ ADDQ(R14, RBX)
+ ADDQ(R15, R9)
+ ADDQ(R15, RBX)
+
+ ADDQ(RDI, RBX)
+
+ VPSRLQ(Imm(19), Y7, Y3)
+ VPSLLQ(Imm(64-19), Y7, Y1)
+ VPOR(Y1, Y3, Y3)
+ VPXOR(Y3, Y8, Y8)
+ VPSRLQ(Imm(61), Y7, Y3)
+ VPSLLQ(Imm(64-61), Y7, Y1)
+ VPOR(Y1, Y3, Y3)
+ VPXOR(Y3, Y8, Y8)
+
+ VPADDQ(Y8, Y0, Y2)
+
+ VPBLENDD(Imm(0xF0), Y2, Y7, Y7)
+
+ MOVQ(RBX, RDI)
+ RORXQ(Imm(41), R9, R13)
+ RORXQ(Imm(18), R9, R14)
+ ADDQ(Mem{Base: SP}.Offset(3*8+frame_YFER), RAX)
+ ORQ(R8, RDI)
+
+ MOVQ(R10, R15)
+ RORXQ(Imm(34), RBX, R12)
+ XORQ(R14, R13)
+ XORQ(R11, R15)
+
+ RORXQ(Imm(14), R9, R14)
+ ANDQ(R9, R15)
+ ADDQ(RAX, RDX)
+ ANDQ(RCX, RDI)
+
+ XORQ(R14, R13)
+ XORQ(R11, R15)
+
+ RORXQ(Imm(39), RBX, R14)
+ ADDQ(R13, R15)
+
+ XORQ(R12, R14)
+ ADDQ(R15, RDX)
+
+ RORXQ(Imm(28), RBX, R12)
+
+ XORQ(R12, R14)
+ MOVQ(RBX, R12)
+ ANDQ(R8, R12)
+ ORQ(R12, RDI)
+
+ ADDQ(R14, RAX)
+ ADDQ(R15, RAX)
+ ADDQ(RDI, RAX)
+
+ SUBQ(Imm(1), Mem{Base: SP}.Offset(frame_SRND))
+ JNE(LabelRef("loop1"))
+
+ MOVQ(U32(2), Mem{Base: SP}.Offset(frame_SRND))
+}
+
+// Line 1164
+func loop2() {
+ Label("loop2")
+ VPADDQ(Mem{Base: BP}, Y4, Y0)
+ VMOVDQU(Y0, Mem{Base: SP}.Offset(frame_YFER))
+
+ MOVQ(R9, R15)
+ RORXQ(Imm(41), RDX, R13)
+ RORXQ(Imm(18), RDX, R14)
+ XORQ(R10, R15)
+
+ XORQ(R14, R13)
+ RORXQ(Imm(14), RDX, R14)
+ ANDQ(RDX, R15)
+
+ XORQ(R14, R13)
+ RORXQ(Imm(34), RAX, R12)
+ XORQ(R10, R15)
+ RORXQ(Imm(39), RAX, R14)
+ MOVQ(RAX, RDI)
+
+ XORQ(R12, R14)
+ RORXQ(Imm(28), RAX, R12)
+ ADDQ(Mem{Base: SP}.Offset(frame_YFER), R11)
+ ORQ(RCX, RDI)
+
+ XORQ(R12, R14)
+ MOVQ(RAX, R12)
+ ANDQ(RBX, RDI)
+ ANDQ(RCX, R12)
+ ADDQ(R13, R15)
+
+ ADDQ(R11, R8)
+ ORQ(R12, RDI)
+ ADDQ(R14, R11)
+
+ ADDQ(R15, R8)
+
+ ADDQ(R15, R11)
+ MOVQ(RDX, R15)
+ RORXQ(Imm(41), R8, R13)
+ RORXQ(Imm(18), R8, R14)
+ XORQ(R9, R15)
+
+ XORQ(R14, R13)
+ RORXQ(Imm(14), R8, R14)
+ ANDQ(R8, R15)
+ ADDQ(RDI, R11)
+
+ XORQ(R14, R13)
+ RORXQ(Imm(34), R11, R12)
+ XORQ(R9, R15)
+ RORXQ(Imm(39), R11, R14)
+ MOVQ(R11, RDI)
+
+ XORQ(R12, R14)
+ RORXQ(Imm(28), R11, R12)
+ ADDQ(Mem{Base: SP}.Offset(8*1+frame_YFER), R10)
+ ORQ(RBX, RDI)
+
+ XORQ(R12, R14)
+ MOVQ(R11, R12)
+ ANDQ(RAX, RDI)
+ ANDQ(RBX, R12)
+ ADDQ(R13, R15)
+
+ ADDQ(R10, RCX)
+ ORQ(R12, RDI)
+ ADDQ(R14, R10)
+
+ ADDQ(R15, RCX)
+
+ ADDQ(R15, R10)
+ MOVQ(R8, R15)
+ RORXQ(Imm(41), RCX, R13)
+ RORXQ(Imm(18), RCX, R14)
+ XORQ(RDX, R15)
+
+ XORQ(R14, R13)
+ RORXQ(Imm(14), RCX, R14)
+ ANDQ(RCX, R15)
+ ADDQ(RDI, R10)
+
+ XORQ(R14, R13)
+ RORXQ(Imm(34), R10, R12)
+ XORQ(RDX, R15)
+ RORXQ(Imm(39), R10, R14)
+ MOVQ(R10, RDI)
+
+ XORQ(R12, R14)
+ RORXQ(Imm(28), R10, R12)
+ ADDQ(Mem{Base: SP}.Offset(8*2+frame_YFER), R9)
+ ORQ(RAX, RDI)
+
+ XORQ(R12, R14)
+ MOVQ(R10, R12)
+ ANDQ(R11, RDI)
+ ANDQ(RAX, R12)
+ ADDQ(R13, R15)
+
+ ADDQ(R9, RBX)
+ ORQ(R12, RDI)
+ ADDQ(R14, R9)
+
+ ADDQ(R15, RBX)
+
+ ADDQ(R15, R9)
+ MOVQ(RCX, R15)
+ RORXQ(Imm(41), RBX, R13)
+ RORXQ(Imm(18), RBX, R14)
+ XORQ(R8, R15)
+
+ XORQ(R14, R13)
+ RORXQ(Imm(14), RBX, R14)
+ ANDQ(RBX, R15)
+ ADDQ(RDI, R9)
+
+ XORQ(R14, R13)
+ RORXQ(Imm(34), R9, R12)
+ XORQ(R8, R15)
+ RORXQ(Imm(39), R9, R14)
+ MOVQ(R9, RDI)
+
+ XORQ(R12, R14)
+ RORXQ(Imm(28), R9, R12)
+ ADDQ(Mem{Base: SP}.Offset(8*3+frame_YFER), RDX)
+ ORQ(R11, RDI)
+
+ XORQ(R12, R14)
+ MOVQ(R9, R12)
+ ANDQ(R10, RDI)
+ ANDQ(R11, R12)
+ ADDQ(R13, R15)
+
+ ADDQ(RDX, RAX)
+ ORQ(R12, RDI)
+ ADDQ(R14, RDX)
+
+ ADDQ(R15, RAX)
+
+ ADDQ(R15, RDX)
+
+ ADDQ(RDI, RDX)
+
+ VPADDQ(Mem{Base: BP}.Offset(1*32), Y5, Y0)
+ VMOVDQU(Y0, Mem{Base: SP}.Offset(frame_YFER))
+ ADDQ(U8(2*32), RBP)
+
+ MOVQ(RBX, R15)
+ RORXQ(Imm(41), RAX, R13)
+ RORXQ(Imm(18), RAX, R14)
+ XORQ(RCX, R15)
+
+ XORQ(R14, R13)
+ RORXQ(Imm(14), RAX, R14)
+ ANDQ(RAX, R15)
+
+ XORQ(R14, R13)
+ RORXQ(Imm(34), RDX, R12)
+ XORQ(RCX, R15)
+ RORXQ(Imm(39), RDX, R14)
+ MOVQ(RDX, RDI)
+
+ XORQ(R12, R14)
+ RORXQ(Imm(28), RDX, R12)
+ ADDQ(Mem{Base: SP}.Offset(frame_YFER), R8)
+ ORQ(R10, RDI)
+
+ XORQ(R12, R14)
+ MOVQ(RDX, R12)
+ ANDQ(R9, RDI)
+ ANDQ(R10, R12)
+ ADDQ(R13, R15)
+
+ ADDQ(R8, R11)
+ ORQ(R12, RDI)
+ ADDQ(R14, R8)
+
+ ADDQ(R15, R11)
+
+ ADDQ(R15, R8)
+ MOVQ(RAX, R15)
+ RORXQ(Imm(41), R11, R13)
+ RORXQ(Imm(18), R11, R14)
+ XORQ(RBX, R15)
+
+ XORQ(R14, R13)
+ RORXQ(Imm(14), R11, R14)
+ ANDQ(R11, R15)
+ ADDQ(RDI, R8)
+
+ XORQ(R14, R13)
+ RORXQ(Imm(34), R8, R12)
+ XORQ(RBX, R15)
+ RORXQ(Imm(39), R8, R14)
+ MOVQ(R8, RDI)
+
+ XORQ(R12, R14)
+ RORXQ(Imm(28), R8, R12)
+ ADDQ(Mem{Base: SP}.Offset(8*1+frame_YFER), RCX)
+ ORQ(R9, RDI)
+
+ XORQ(R12, R14)
+ MOVQ(R8, R12)
+ ANDQ(RDX, RDI)
+ ANDQ(R9, R12)
+ ADDQ(R13, R15)
+
+ ADDQ(RCX, R10)
+ ORQ(R12, RDI)
+ ADDQ(R14, RCX)
+
+ ADDQ(R15, R10)
+
+ ADDQ(R15, RCX)
+ MOVQ(R11, R15)
+ RORXQ(Imm(41), R10, R13)
+ RORXQ(Imm(18), R10, R14)
+ XORQ(RAX, R15)
+
+ XORQ(R14, R13)
+ RORXQ(Imm(14), R10, R14)
+ ANDQ(R10, R15)
+ ADDQ(RDI, RCX)
+
+ XORQ(R14, R13)
+ RORXQ(Imm(34), RCX, R12)
+ XORQ(RAX, R15)
+ RORXQ(Imm(39), RCX, R14)
+ MOVQ(RCX, RDI)
+
+ XORQ(R12, R14)
+ RORXQ(Imm(28), RCX, R12)
+ ADDQ(Mem{Base: SP}.Offset(8*2+frame_YFER), RBX)
+ ORQ(RDX, RDI)
+
+ XORQ(R12, R14)
+ MOVQ(RCX, R12)
+ ANDQ(R8, RDI)
+ ANDQ(RDX, R12)
+ ADDQ(R13, R15)
+
+ ADDQ(RBX, R9)
+ ORQ(R12, RDI)
+ ADDQ(R14, RBX)
+
+ ADDQ(R15, R9)
+
+ ADDQ(R15, RBX)
+ MOVQ(R10, R15)
+ RORXQ(Imm(41), R9, R13)
+ RORXQ(Imm(18), R9, R14)
+ XORQ(R11, R15)
+
+ XORQ(R14, R13)
+ RORXQ(Imm(14), R9, R14)
+ ANDQ(R9, R15)
+ ADDQ(RDI, RBX)
+
+ XORQ(R14, R13)
+ RORXQ(Imm(34), RBX, R12)
+ XORQ(R11, R15)
+ RORXQ(Imm(39), RBX, R14)
+ MOVQ(RBX, RDI)
+
+ XORQ(R12, R14)
+ RORXQ(Imm(28), RBX, R12)
+ ADDQ(Mem{Base: SP}.Offset(8*3+frame_YFER), RAX)
+ ORQ(R8, RDI)
+
+ XORQ(R12, R14)
+ MOVQ(RBX, R12)
+ ANDQ(RCX, RDI)
+ ANDQ(R8, R12)
+ ADDQ(R13, R15)
+
+ ADDQ(RAX, RDX)
+ ORQ(R12, RDI)
+ ADDQ(R14, RAX)
+
+ ADDQ(R15, RDX)
+
+ ADDQ(R15, RAX)
+
+ ADDQ(RDI, RAX)
+
+ VMOVDQU(Y6, Y4)
+ VMOVDQU(Y7, Y5)
+
+ SUBQ(Imm(1), Mem{Base: SP}.Offset(frame_SRND))
+ JNE(LabelRef("loop2"))
+
+ addm(Mem{Base: SI}.Offset(8*0), RAX)
+ addm(Mem{Base: SI}.Offset(8*1), RBX)
+ addm(Mem{Base: SI}.Offset(8*2), RCX)
+ addm(Mem{Base: SI}.Offset(8*3), R8)
+ addm(Mem{Base: SI}.Offset(8*4), RDX)
+ addm(Mem{Base: SI}.Offset(8*5), R9)
+ addm(Mem{Base: SI}.Offset(8*6), R10)
+ addm(Mem{Base: SI}.Offset(8*7), R11)
+
+ MOVQ(Mem{Base: SP}.Offset(frame_INP), RDI)
+ ADDQ(Imm(128), RDI)
+ CMPQ(RDI, Mem{Base: SP}.Offset(frame_INPEND))
+ JNE(LabelRef("loop0"))
+}
+
+// Line 1468
+func done_hash() {
+ Label("done_hash")
+ VZEROUPPER()
+ RET()
+}
+
+// ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~DATA SECTION~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~##
+
+// Pointers for memoizing Data section symbols
+var PSHUFFLE_BYTE_FLIP_MASK_DATA_ptr, MASK_YMM_LO_ptr *Mem
+
+// Line 310
+func PSHUFFLE_BYTE_FLIP_MASK_DATA() Mem {
+ if PSHUFFLE_BYTE_FLIP_MASK_DATA_ptr != nil {
+ return *PSHUFFLE_BYTE_FLIP_MASK_DATA_ptr
+ }
+
+ PSHUFFLE_BYTE_FLIP_MASK_DATA := GLOBL("PSHUFFLE_BYTE_FLIP_MASK", NOPTR|RODATA)
+ PSHUFFLE_BYTE_FLIP_MASK_DATA_ptr = &PSHUFFLE_BYTE_FLIP_MASK_DATA
+ DATA(0x00, U64(0x0001020304050607))
+ DATA(0x08, U64(0x08090a0b0c0d0e0f))
+ DATA(0x10, U64(0x1011121314151617))
+ DATA(0x18, U64(0x18191a1b1c1d1e1f))
+ return PSHUFFLE_BYTE_FLIP_MASK_DATA
+}
+
+// Line 317
+func MASK_YMM_LO_DATA() Mem {
+ if MASK_YMM_LO_ptr != nil {
+ return *MASK_YMM_LO_ptr
+ }
+
+ MASK_YMM_LO := GLOBL("MASK_YMM_LO", NOPTR|RODATA)
+ MASK_YMM_LO_ptr = &MASK_YMM_LO
+ DATA(0x00, U64(0x0000000000000000))
+ DATA(0x08, U64(0x0000000000000000))
+ DATA(0x10, U64(0xFFFFFFFFFFFFFFFF))
+ DATA(0x18, U64(0xFFFFFFFFFFFFFFFF))
+ return MASK_YMM_LO
+}
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
+// Code generated by command: go run sha512block_amd64_asm.go -out ../sha512block_amd64.s -pkg sha512. DO NOT EDIT.
//go:build !purego
#include "textflag.h"
-// SHA512 block routine. See sha512block.go for Go equivalent.
-//
-// The algorithm is detailed in FIPS 180-4:
-//
-// https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
-//
-// Wt = Mt; for 0 <= t <= 15
-// Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79
-//
-// a = H0
-// b = H1
-// c = H2
-// d = H3
-// e = H4
-// f = H5
-// g = H6
-// h = H7
-//
-// for t = 0 to 79 {
-// T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
-// T2 = BIGSIGMA0(a) + Maj(a,b,c)
-// h = g
-// g = f
-// f = e
-// e = d + T1
-// d = c
-// c = b
-// b = a
-// a = T1 + T2
-// }
-//
-// H0 = a + H0
-// H1 = b + H1
-// H2 = c + H2
-// H3 = d + H3
-// H4 = e + H4
-// H5 = f + H5
-// H6 = g + H6
-// H7 = h + H7
-
-// Wt = Mt; for 0 <= t <= 15
-#define MSGSCHEDULE0(index) \
- MOVQ (index*8)(SI), AX; \
- BSWAPQ AX; \
- MOVQ AX, (index*8)(BP)
-
-// Wt = SIGMA1(Wt-2) + Wt-7 + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79
-// SIGMA0(x) = ROTR(1,x) XOR ROTR(8,x) XOR SHR(7,x)
-// SIGMA1(x) = ROTR(19,x) XOR ROTR(61,x) XOR SHR(6,x)
-#define MSGSCHEDULE1(index) \
- MOVQ ((index-2)*8)(BP), AX; \
- MOVQ AX, CX; \
- RORQ $19, AX; \
- MOVQ CX, DX; \
- RORQ $61, CX; \
- SHRQ $6, DX; \
- MOVQ ((index-15)*8)(BP), BX; \
- XORQ CX, AX; \
- MOVQ BX, CX; \
- XORQ DX, AX; \
- RORQ $1, BX; \
- MOVQ CX, DX; \
- SHRQ $7, DX; \
- RORQ $8, CX; \
- ADDQ ((index-7)*8)(BP), AX; \
- XORQ CX, BX; \
- XORQ DX, BX; \
- ADDQ ((index-16)*8)(BP), BX; \
- ADDQ BX, AX; \
- MOVQ AX, ((index)*8)(BP)
-
-// Calculate T1 in AX - uses AX, CX and DX registers.
-// h is also used as an accumulator. Wt is passed in AX.
-// T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + Kt + Wt
-// BIGSIGMA1(x) = ROTR(14,x) XOR ROTR(18,x) XOR ROTR(41,x)
-// Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
-#define SHA512T1(const, e, f, g, h) \
- MOVQ $const, DX; \
- ADDQ AX, h; \
- MOVQ e, AX; \
- ADDQ DX, h; \
- MOVQ e, CX; \
- RORQ $14, AX; \
- MOVQ e, DX; \
- RORQ $18, CX; \
- XORQ CX, AX; \
- MOVQ e, CX; \
- RORQ $41, DX; \
- ANDQ f, CX; \
- XORQ AX, DX; \
- MOVQ e, AX; \
- NOTQ AX; \
- ADDQ DX, h; \
- ANDQ g, AX; \
- XORQ CX, AX; \
- ADDQ h, AX
-
-// Calculate T2 in BX - uses BX, CX, DX and DI registers.
-// T2 = BIGSIGMA0(a) + Maj(a, b, c)
-// BIGSIGMA0(x) = ROTR(28,x) XOR ROTR(34,x) XOR ROTR(39,x)
-// Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
-#define SHA512T2(a, b, c) \
- MOVQ a, DI; \
- MOVQ c, BX; \
- RORQ $28, DI; \
- MOVQ a, DX; \
- ANDQ b, BX; \
- RORQ $34, DX; \
- MOVQ a, CX; \
- ANDQ c, CX; \
- XORQ DX, DI; \
- XORQ CX, BX; \
- MOVQ a, DX; \
- MOVQ b, CX; \
- RORQ $39, DX; \
- ANDQ a, CX; \
- XORQ CX, BX; \
- XORQ DX, DI; \
- ADDQ DI, BX
-
-// Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
-// The values for e and a are stored in d and h, ready for rotation.
-#define SHA512ROUND(index, const, a, b, c, d, e, f, g, h) \
- SHA512T1(const, e, f, g, h); \
- SHA512T2(a, b, c); \
- MOVQ BX, h; \
- ADDQ AX, d; \
- ADDQ AX, h
-
-#define SHA512ROUND0(index, const, a, b, c, d, e, f, g, h) \
- MSGSCHEDULE0(index); \
- SHA512ROUND(index, const, a, b, c, d, e, f, g, h)
-
-#define SHA512ROUND1(index, const, a, b, c, d, e, f, g, h) \
- MSGSCHEDULE1(index); \
- SHA512ROUND(index, const, a, b, c, d, e, f, g, h)
-
-TEXT ·blockAMD64(SB),0,$648-32
- MOVQ p_base+8(FP), SI
- MOVQ p_len+16(FP), DX
- SHRQ $7, DX
- SHLQ $7, DX
-
- LEAQ (SI)(DX*1), DI
- MOVQ DI, 640(SP)
- CMPQ SI, DI
- JEQ end
-
- MOVQ dig+0(FP), BP
- MOVQ (0*8)(BP), R8 // a = H0
- MOVQ (1*8)(BP), R9 // b = H1
- MOVQ (2*8)(BP), R10 // c = H2
- MOVQ (3*8)(BP), R11 // d = H3
- MOVQ (4*8)(BP), R12 // e = H4
- MOVQ (5*8)(BP), R13 // f = H5
- MOVQ (6*8)(BP), R14 // g = H6
- MOVQ (7*8)(BP), R15 // h = H7
+// func blockAMD64(dig *digest, p []byte)
+TEXT ·blockAMD64(SB), $648-32
+ MOVQ p_base+8(FP), SI
+ MOVQ p_len+16(FP), DX
+ SHRQ $0x07, DX
+ SHLQ $0x07, DX
+ LEAQ (SI)(DX*1), DI
+ MOVQ DI, 640(SP)
+ CMPQ SI, DI
+ JEQ end
+ MOVQ dig+0(FP), BP
+ MOVQ (BP), R8
+ MOVQ 8(BP), R9
+ MOVQ 16(BP), R10
+ MOVQ 24(BP), R11
+ MOVQ 32(BP), R12
+ MOVQ 40(BP), R13
+ MOVQ 48(BP), R14
+ MOVQ 56(BP), R15
loop:
- MOVQ SP, BP // message schedule
-
- SHA512ROUND0(0, 0x428a2f98d728ae22, R8, R9, R10, R11, R12, R13, R14, R15)
- SHA512ROUND0(1, 0x7137449123ef65cd, R15, R8, R9, R10, R11, R12, R13, R14)
- SHA512ROUND0(2, 0xb5c0fbcfec4d3b2f, R14, R15, R8, R9, R10, R11, R12, R13)
- SHA512ROUND0(3, 0xe9b5dba58189dbbc, R13, R14, R15, R8, R9, R10, R11, R12)
- SHA512ROUND0(4, 0x3956c25bf348b538, R12, R13, R14, R15, R8, R9, R10, R11)
- SHA512ROUND0(5, 0x59f111f1b605d019, R11, R12, R13, R14, R15, R8, R9, R10)
- SHA512ROUND0(6, 0x923f82a4af194f9b, R10, R11, R12, R13, R14, R15, R8, R9)
- SHA512ROUND0(7, 0xab1c5ed5da6d8118, R9, R10, R11, R12, R13, R14, R15, R8)
- SHA512ROUND0(8, 0xd807aa98a3030242, R8, R9, R10, R11, R12, R13, R14, R15)
- SHA512ROUND0(9, 0x12835b0145706fbe, R15, R8, R9, R10, R11, R12, R13, R14)
- SHA512ROUND0(10, 0x243185be4ee4b28c, R14, R15, R8, R9, R10, R11, R12, R13)
- SHA512ROUND0(11, 0x550c7dc3d5ffb4e2, R13, R14, R15, R8, R9, R10, R11, R12)
- SHA512ROUND0(12, 0x72be5d74f27b896f, R12, R13, R14, R15, R8, R9, R10, R11)
- SHA512ROUND0(13, 0x80deb1fe3b1696b1, R11, R12, R13, R14, R15, R8, R9, R10)
- SHA512ROUND0(14, 0x9bdc06a725c71235, R10, R11, R12, R13, R14, R15, R8, R9)
- SHA512ROUND0(15, 0xc19bf174cf692694, R9, R10, R11, R12, R13, R14, R15, R8)
-
- SHA512ROUND1(16, 0xe49b69c19ef14ad2, R8, R9, R10, R11, R12, R13, R14, R15)
- SHA512ROUND1(17, 0xefbe4786384f25e3, R15, R8, R9, R10, R11, R12, R13, R14)
- SHA512ROUND1(18, 0x0fc19dc68b8cd5b5, R14, R15, R8, R9, R10, R11, R12, R13)
- SHA512ROUND1(19, 0x240ca1cc77ac9c65, R13, R14, R15, R8, R9, R10, R11, R12)
- SHA512ROUND1(20, 0x2de92c6f592b0275, R12, R13, R14, R15, R8, R9, R10, R11)
- SHA512ROUND1(21, 0x4a7484aa6ea6e483, R11, R12, R13, R14, R15, R8, R9, R10)
- SHA512ROUND1(22, 0x5cb0a9dcbd41fbd4, R10, R11, R12, R13, R14, R15, R8, R9)
- SHA512ROUND1(23, 0x76f988da831153b5, R9, R10, R11, R12, R13, R14, R15, R8)
- SHA512ROUND1(24, 0x983e5152ee66dfab, R8, R9, R10, R11, R12, R13, R14, R15)
- SHA512ROUND1(25, 0xa831c66d2db43210, R15, R8, R9, R10, R11, R12, R13, R14)
- SHA512ROUND1(26, 0xb00327c898fb213f, R14, R15, R8, R9, R10, R11, R12, R13)
- SHA512ROUND1(27, 0xbf597fc7beef0ee4, R13, R14, R15, R8, R9, R10, R11, R12)
- SHA512ROUND1(28, 0xc6e00bf33da88fc2, R12, R13, R14, R15, R8, R9, R10, R11)
- SHA512ROUND1(29, 0xd5a79147930aa725, R11, R12, R13, R14, R15, R8, R9, R10)
- SHA512ROUND1(30, 0x06ca6351e003826f, R10, R11, R12, R13, R14, R15, R8, R9)
- SHA512ROUND1(31, 0x142929670a0e6e70, R9, R10, R11, R12, R13, R14, R15, R8)
- SHA512ROUND1(32, 0x27b70a8546d22ffc, R8, R9, R10, R11, R12, R13, R14, R15)
- SHA512ROUND1(33, 0x2e1b21385c26c926, R15, R8, R9, R10, R11, R12, R13, R14)
- SHA512ROUND1(34, 0x4d2c6dfc5ac42aed, R14, R15, R8, R9, R10, R11, R12, R13)
- SHA512ROUND1(35, 0x53380d139d95b3df, R13, R14, R15, R8, R9, R10, R11, R12)
- SHA512ROUND1(36, 0x650a73548baf63de, R12, R13, R14, R15, R8, R9, R10, R11)
- SHA512ROUND1(37, 0x766a0abb3c77b2a8, R11, R12, R13, R14, R15, R8, R9, R10)
- SHA512ROUND1(38, 0x81c2c92e47edaee6, R10, R11, R12, R13, R14, R15, R8, R9)
- SHA512ROUND1(39, 0x92722c851482353b, R9, R10, R11, R12, R13, R14, R15, R8)
- SHA512ROUND1(40, 0xa2bfe8a14cf10364, R8, R9, R10, R11, R12, R13, R14, R15)
- SHA512ROUND1(41, 0xa81a664bbc423001, R15, R8, R9, R10, R11, R12, R13, R14)
- SHA512ROUND1(42, 0xc24b8b70d0f89791, R14, R15, R8, R9, R10, R11, R12, R13)
- SHA512ROUND1(43, 0xc76c51a30654be30, R13, R14, R15, R8, R9, R10, R11, R12)
- SHA512ROUND1(44, 0xd192e819d6ef5218, R12, R13, R14, R15, R8, R9, R10, R11)
- SHA512ROUND1(45, 0xd69906245565a910, R11, R12, R13, R14, R15, R8, R9, R10)
- SHA512ROUND1(46, 0xf40e35855771202a, R10, R11, R12, R13, R14, R15, R8, R9)
- SHA512ROUND1(47, 0x106aa07032bbd1b8, R9, R10, R11, R12, R13, R14, R15, R8)
- SHA512ROUND1(48, 0x19a4c116b8d2d0c8, R8, R9, R10, R11, R12, R13, R14, R15)
- SHA512ROUND1(49, 0x1e376c085141ab53, R15, R8, R9, R10, R11, R12, R13, R14)
- SHA512ROUND1(50, 0x2748774cdf8eeb99, R14, R15, R8, R9, R10, R11, R12, R13)
- SHA512ROUND1(51, 0x34b0bcb5e19b48a8, R13, R14, R15, R8, R9, R10, R11, R12)
- SHA512ROUND1(52, 0x391c0cb3c5c95a63, R12, R13, R14, R15, R8, R9, R10, R11)
- SHA512ROUND1(53, 0x4ed8aa4ae3418acb, R11, R12, R13, R14, R15, R8, R9, R10)
- SHA512ROUND1(54, 0x5b9cca4f7763e373, R10, R11, R12, R13, R14, R15, R8, R9)
- SHA512ROUND1(55, 0x682e6ff3d6b2b8a3, R9, R10, R11, R12, R13, R14, R15, R8)
- SHA512ROUND1(56, 0x748f82ee5defb2fc, R8, R9, R10, R11, R12, R13, R14, R15)
- SHA512ROUND1(57, 0x78a5636f43172f60, R15, R8, R9, R10, R11, R12, R13, R14)
- SHA512ROUND1(58, 0x84c87814a1f0ab72, R14, R15, R8, R9, R10, R11, R12, R13)
- SHA512ROUND1(59, 0x8cc702081a6439ec, R13, R14, R15, R8, R9, R10, R11, R12)
- SHA512ROUND1(60, 0x90befffa23631e28, R12, R13, R14, R15, R8, R9, R10, R11)
- SHA512ROUND1(61, 0xa4506cebde82bde9, R11, R12, R13, R14, R15, R8, R9, R10)
- SHA512ROUND1(62, 0xbef9a3f7b2c67915, R10, R11, R12, R13, R14, R15, R8, R9)
- SHA512ROUND1(63, 0xc67178f2e372532b, R9, R10, R11, R12, R13, R14, R15, R8)
- SHA512ROUND1(64, 0xca273eceea26619c, R8, R9, R10, R11, R12, R13, R14, R15)
- SHA512ROUND1(65, 0xd186b8c721c0c207, R15, R8, R9, R10, R11, R12, R13, R14)
- SHA512ROUND1(66, 0xeada7dd6cde0eb1e, R14, R15, R8, R9, R10, R11, R12, R13)
- SHA512ROUND1(67, 0xf57d4f7fee6ed178, R13, R14, R15, R8, R9, R10, R11, R12)
- SHA512ROUND1(68, 0x06f067aa72176fba, R12, R13, R14, R15, R8, R9, R10, R11)
- SHA512ROUND1(69, 0x0a637dc5a2c898a6, R11, R12, R13, R14, R15, R8, R9, R10)
- SHA512ROUND1(70, 0x113f9804bef90dae, R10, R11, R12, R13, R14, R15, R8, R9)
- SHA512ROUND1(71, 0x1b710b35131c471b, R9, R10, R11, R12, R13, R14, R15, R8)
- SHA512ROUND1(72, 0x28db77f523047d84, R8, R9, R10, R11, R12, R13, R14, R15)
- SHA512ROUND1(73, 0x32caab7b40c72493, R15, R8, R9, R10, R11, R12, R13, R14)
- SHA512ROUND1(74, 0x3c9ebe0a15c9bebc, R14, R15, R8, R9, R10, R11, R12, R13)
- SHA512ROUND1(75, 0x431d67c49c100d4c, R13, R14, R15, R8, R9, R10, R11, R12)
- SHA512ROUND1(76, 0x4cc5d4becb3e42b6, R12, R13, R14, R15, R8, R9, R10, R11)
- SHA512ROUND1(77, 0x597f299cfc657e2a, R11, R12, R13, R14, R15, R8, R9, R10)
- SHA512ROUND1(78, 0x5fcb6fab3ad6faec, R10, R11, R12, R13, R14, R15, R8, R9)
- SHA512ROUND1(79, 0x6c44198c4a475817, R9, R10, R11, R12, R13, R14, R15, R8)
-
- MOVQ dig+0(FP), BP
- ADDQ (0*8)(BP), R8 // H0 = a + H0
- MOVQ R8, (0*8)(BP)
- ADDQ (1*8)(BP), R9 // H1 = b + H1
- MOVQ R9, (1*8)(BP)
- ADDQ (2*8)(BP), R10 // H2 = c + H2
- MOVQ R10, (2*8)(BP)
- ADDQ (3*8)(BP), R11 // H3 = d + H3
- MOVQ R11, (3*8)(BP)
- ADDQ (4*8)(BP), R12 // H4 = e + H4
- MOVQ R12, (4*8)(BP)
- ADDQ (5*8)(BP), R13 // H5 = f + H5
- MOVQ R13, (5*8)(BP)
- ADDQ (6*8)(BP), R14 // H6 = g + H6
- MOVQ R14, (6*8)(BP)
- ADDQ (7*8)(BP), R15 // H7 = h + H7
- MOVQ R15, (7*8)(BP)
-
- ADDQ $128, SI
- CMPQ SI, 640(SP)
- JB loop
+ MOVQ SP, BP
+ MOVQ (SI), AX
+ BSWAPQ AX
+ MOVQ AX, (BP)
+ MOVQ $0x428a2f98d728ae22, DX
+ ADDQ AX, R15
+ MOVQ R12, AX
+ ADDQ DX, R15
+ MOVQ R12, CX
+ RORQ $0x0e, AX
+ MOVQ R12, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R12, CX
+ RORQ $0x29, DX
+ ANDQ R13, CX
+ XORQ AX, DX
+ MOVQ R12, AX
+ NOTQ AX
+ ADDQ DX, R15
+ ANDQ R14, AX
+ XORQ CX, AX
+ ADDQ R15, AX
+ MOVQ R8, DI
+ MOVQ R10, BX
+ RORQ $0x1c, DI
+ MOVQ R8, DX
+ ANDQ R9, BX
+ RORQ $0x22, DX
+ MOVQ R8, CX
+ ANDQ R10, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R8, DX
+ MOVQ R9, CX
+ RORQ $0x27, DX
+ ANDQ R8, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R15
+ ADDQ AX, R11
+ ADDQ AX, R15
+ MOVQ 8(SI), AX
+ BSWAPQ AX
+ MOVQ AX, 8(BP)
+ MOVQ $0x7137449123ef65cd, DX
+ ADDQ AX, R14
+ MOVQ R11, AX
+ ADDQ DX, R14
+ MOVQ R11, CX
+ RORQ $0x0e, AX
+ MOVQ R11, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R11, CX
+ RORQ $0x29, DX
+ ANDQ R12, CX
+ XORQ AX, DX
+ MOVQ R11, AX
+ NOTQ AX
+ ADDQ DX, R14
+ ANDQ R13, AX
+ XORQ CX, AX
+ ADDQ R14, AX
+ MOVQ R15, DI
+ MOVQ R9, BX
+ RORQ $0x1c, DI
+ MOVQ R15, DX
+ ANDQ R8, BX
+ RORQ $0x22, DX
+ MOVQ R15, CX
+ ANDQ R9, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R15, DX
+ MOVQ R8, CX
+ RORQ $0x27, DX
+ ANDQ R15, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R14
+ ADDQ AX, R10
+ ADDQ AX, R14
+ MOVQ 16(SI), AX
+ BSWAPQ AX
+ MOVQ AX, 16(BP)
+ MOVQ $0xb5c0fbcfec4d3b2f, DX
+ ADDQ AX, R13
+ MOVQ R10, AX
+ ADDQ DX, R13
+ MOVQ R10, CX
+ RORQ $0x0e, AX
+ MOVQ R10, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R10, CX
+ RORQ $0x29, DX
+ ANDQ R11, CX
+ XORQ AX, DX
+ MOVQ R10, AX
+ NOTQ AX
+ ADDQ DX, R13
+ ANDQ R12, AX
+ XORQ CX, AX
+ ADDQ R13, AX
+ MOVQ R14, DI
+ MOVQ R8, BX
+ RORQ $0x1c, DI
+ MOVQ R14, DX
+ ANDQ R15, BX
+ RORQ $0x22, DX
+ MOVQ R14, CX
+ ANDQ R8, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R14, DX
+ MOVQ R15, CX
+ RORQ $0x27, DX
+ ANDQ R14, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R13
+ ADDQ AX, R9
+ ADDQ AX, R13
+ MOVQ 24(SI), AX
+ BSWAPQ AX
+ MOVQ AX, 24(BP)
+ MOVQ $0xe9b5dba58189dbbc, DX
+ ADDQ AX, R12
+ MOVQ R9, AX
+ ADDQ DX, R12
+ MOVQ R9, CX
+ RORQ $0x0e, AX
+ MOVQ R9, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R9, CX
+ RORQ $0x29, DX
+ ANDQ R10, CX
+ XORQ AX, DX
+ MOVQ R9, AX
+ NOTQ AX
+ ADDQ DX, R12
+ ANDQ R11, AX
+ XORQ CX, AX
+ ADDQ R12, AX
+ MOVQ R13, DI
+ MOVQ R15, BX
+ RORQ $0x1c, DI
+ MOVQ R13, DX
+ ANDQ R14, BX
+ RORQ $0x22, DX
+ MOVQ R13, CX
+ ANDQ R15, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R13, DX
+ MOVQ R14, CX
+ RORQ $0x27, DX
+ ANDQ R13, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R12
+ ADDQ AX, R8
+ ADDQ AX, R12
+ MOVQ 32(SI), AX
+ BSWAPQ AX
+ MOVQ AX, 32(BP)
+ MOVQ $0x3956c25bf348b538, DX
+ ADDQ AX, R11
+ MOVQ R8, AX
+ ADDQ DX, R11
+ MOVQ R8, CX
+ RORQ $0x0e, AX
+ MOVQ R8, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R8, CX
+ RORQ $0x29, DX
+ ANDQ R9, CX
+ XORQ AX, DX
+ MOVQ R8, AX
+ NOTQ AX
+ ADDQ DX, R11
+ ANDQ R10, AX
+ XORQ CX, AX
+ ADDQ R11, AX
+ MOVQ R12, DI
+ MOVQ R14, BX
+ RORQ $0x1c, DI
+ MOVQ R12, DX
+ ANDQ R13, BX
+ RORQ $0x22, DX
+ MOVQ R12, CX
+ ANDQ R14, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R12, DX
+ MOVQ R13, CX
+ RORQ $0x27, DX
+ ANDQ R12, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R11
+ ADDQ AX, R15
+ ADDQ AX, R11
+ MOVQ 40(SI), AX
+ BSWAPQ AX
+ MOVQ AX, 40(BP)
+ MOVQ $0x59f111f1b605d019, DX
+ ADDQ AX, R10
+ MOVQ R15, AX
+ ADDQ DX, R10
+ MOVQ R15, CX
+ RORQ $0x0e, AX
+ MOVQ R15, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R15, CX
+ RORQ $0x29, DX
+ ANDQ R8, CX
+ XORQ AX, DX
+ MOVQ R15, AX
+ NOTQ AX
+ ADDQ DX, R10
+ ANDQ R9, AX
+ XORQ CX, AX
+ ADDQ R10, AX
+ MOVQ R11, DI
+ MOVQ R13, BX
+ RORQ $0x1c, DI
+ MOVQ R11, DX
+ ANDQ R12, BX
+ RORQ $0x22, DX
+ MOVQ R11, CX
+ ANDQ R13, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R11, DX
+ MOVQ R12, CX
+ RORQ $0x27, DX
+ ANDQ R11, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R10
+ ADDQ AX, R14
+ ADDQ AX, R10
+ MOVQ 48(SI), AX
+ BSWAPQ AX
+ MOVQ AX, 48(BP)
+ MOVQ $0x923f82a4af194f9b, DX
+ ADDQ AX, R9
+ MOVQ R14, AX
+ ADDQ DX, R9
+ MOVQ R14, CX
+ RORQ $0x0e, AX
+ MOVQ R14, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R14, CX
+ RORQ $0x29, DX
+ ANDQ R15, CX
+ XORQ AX, DX
+ MOVQ R14, AX
+ NOTQ AX
+ ADDQ DX, R9
+ ANDQ R8, AX
+ XORQ CX, AX
+ ADDQ R9, AX
+ MOVQ R10, DI
+ MOVQ R12, BX
+ RORQ $0x1c, DI
+ MOVQ R10, DX
+ ANDQ R11, BX
+ RORQ $0x22, DX
+ MOVQ R10, CX
+ ANDQ R12, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R10, DX
+ MOVQ R11, CX
+ RORQ $0x27, DX
+ ANDQ R10, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R9
+ ADDQ AX, R13
+ ADDQ AX, R9
+ MOVQ 56(SI), AX
+ BSWAPQ AX
+ MOVQ AX, 56(BP)
+ MOVQ $0xab1c5ed5da6d8118, DX
+ ADDQ AX, R8
+ MOVQ R13, AX
+ ADDQ DX, R8
+ MOVQ R13, CX
+ RORQ $0x0e, AX
+ MOVQ R13, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R13, CX
+ RORQ $0x29, DX
+ ANDQ R14, CX
+ XORQ AX, DX
+ MOVQ R13, AX
+ NOTQ AX
+ ADDQ DX, R8
+ ANDQ R15, AX
+ XORQ CX, AX
+ ADDQ R8, AX
+ MOVQ R9, DI
+ MOVQ R11, BX
+ RORQ $0x1c, DI
+ MOVQ R9, DX
+ ANDQ R10, BX
+ RORQ $0x22, DX
+ MOVQ R9, CX
+ ANDQ R11, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R9, DX
+ MOVQ R10, CX
+ RORQ $0x27, DX
+ ANDQ R9, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R8
+ ADDQ AX, R12
+ ADDQ AX, R8
+ MOVQ 64(SI), AX
+ BSWAPQ AX
+ MOVQ AX, 64(BP)
+ MOVQ $0xd807aa98a3030242, DX
+ ADDQ AX, R15
+ MOVQ R12, AX
+ ADDQ DX, R15
+ MOVQ R12, CX
+ RORQ $0x0e, AX
+ MOVQ R12, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R12, CX
+ RORQ $0x29, DX
+ ANDQ R13, CX
+ XORQ AX, DX
+ MOVQ R12, AX
+ NOTQ AX
+ ADDQ DX, R15
+ ANDQ R14, AX
+ XORQ CX, AX
+ ADDQ R15, AX
+ MOVQ R8, DI
+ MOVQ R10, BX
+ RORQ $0x1c, DI
+ MOVQ R8, DX
+ ANDQ R9, BX
+ RORQ $0x22, DX
+ MOVQ R8, CX
+ ANDQ R10, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R8, DX
+ MOVQ R9, CX
+ RORQ $0x27, DX
+ ANDQ R8, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R15
+ ADDQ AX, R11
+ ADDQ AX, R15
+ MOVQ 72(SI), AX
+ BSWAPQ AX
+ MOVQ AX, 72(BP)
+ MOVQ $0x12835b0145706fbe, DX
+ ADDQ AX, R14
+ MOVQ R11, AX
+ ADDQ DX, R14
+ MOVQ R11, CX
+ RORQ $0x0e, AX
+ MOVQ R11, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R11, CX
+ RORQ $0x29, DX
+ ANDQ R12, CX
+ XORQ AX, DX
+ MOVQ R11, AX
+ NOTQ AX
+ ADDQ DX, R14
+ ANDQ R13, AX
+ XORQ CX, AX
+ ADDQ R14, AX
+ MOVQ R15, DI
+ MOVQ R9, BX
+ RORQ $0x1c, DI
+ MOVQ R15, DX
+ ANDQ R8, BX
+ RORQ $0x22, DX
+ MOVQ R15, CX
+ ANDQ R9, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R15, DX
+ MOVQ R8, CX
+ RORQ $0x27, DX
+ ANDQ R15, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R14
+ ADDQ AX, R10
+ ADDQ AX, R14
+ MOVQ 80(SI), AX
+ BSWAPQ AX
+ MOVQ AX, 80(BP)
+ MOVQ $0x243185be4ee4b28c, DX
+ ADDQ AX, R13
+ MOVQ R10, AX
+ ADDQ DX, R13
+ MOVQ R10, CX
+ RORQ $0x0e, AX
+ MOVQ R10, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R10, CX
+ RORQ $0x29, DX
+ ANDQ R11, CX
+ XORQ AX, DX
+ MOVQ R10, AX
+ NOTQ AX
+ ADDQ DX, R13
+ ANDQ R12, AX
+ XORQ CX, AX
+ ADDQ R13, AX
+ MOVQ R14, DI
+ MOVQ R8, BX
+ RORQ $0x1c, DI
+ MOVQ R14, DX
+ ANDQ R15, BX
+ RORQ $0x22, DX
+ MOVQ R14, CX
+ ANDQ R8, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R14, DX
+ MOVQ R15, CX
+ RORQ $0x27, DX
+ ANDQ R14, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R13
+ ADDQ AX, R9
+ ADDQ AX, R13
+ MOVQ 88(SI), AX
+ BSWAPQ AX
+ MOVQ AX, 88(BP)
+ MOVQ $0x550c7dc3d5ffb4e2, DX
+ ADDQ AX, R12
+ MOVQ R9, AX
+ ADDQ DX, R12
+ MOVQ R9, CX
+ RORQ $0x0e, AX
+ MOVQ R9, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R9, CX
+ RORQ $0x29, DX
+ ANDQ R10, CX
+ XORQ AX, DX
+ MOVQ R9, AX
+ NOTQ AX
+ ADDQ DX, R12
+ ANDQ R11, AX
+ XORQ CX, AX
+ ADDQ R12, AX
+ MOVQ R13, DI
+ MOVQ R15, BX
+ RORQ $0x1c, DI
+ MOVQ R13, DX
+ ANDQ R14, BX
+ RORQ $0x22, DX
+ MOVQ R13, CX
+ ANDQ R15, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R13, DX
+ MOVQ R14, CX
+ RORQ $0x27, DX
+ ANDQ R13, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R12
+ ADDQ AX, R8
+ ADDQ AX, R12
+ MOVQ 96(SI), AX
+ BSWAPQ AX
+ MOVQ AX, 96(BP)
+ MOVQ $0x72be5d74f27b896f, DX
+ ADDQ AX, R11
+ MOVQ R8, AX
+ ADDQ DX, R11
+ MOVQ R8, CX
+ RORQ $0x0e, AX
+ MOVQ R8, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R8, CX
+ RORQ $0x29, DX
+ ANDQ R9, CX
+ XORQ AX, DX
+ MOVQ R8, AX
+ NOTQ AX
+ ADDQ DX, R11
+ ANDQ R10, AX
+ XORQ CX, AX
+ ADDQ R11, AX
+ MOVQ R12, DI
+ MOVQ R14, BX
+ RORQ $0x1c, DI
+ MOVQ R12, DX
+ ANDQ R13, BX
+ RORQ $0x22, DX
+ MOVQ R12, CX
+ ANDQ R14, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R12, DX
+ MOVQ R13, CX
+ RORQ $0x27, DX
+ ANDQ R12, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R11
+ ADDQ AX, R15
+ ADDQ AX, R11
+ MOVQ 104(SI), AX
+ BSWAPQ AX
+ MOVQ AX, 104(BP)
+ MOVQ $0x80deb1fe3b1696b1, DX
+ ADDQ AX, R10
+ MOVQ R15, AX
+ ADDQ DX, R10
+ MOVQ R15, CX
+ RORQ $0x0e, AX
+ MOVQ R15, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R15, CX
+ RORQ $0x29, DX
+ ANDQ R8, CX
+ XORQ AX, DX
+ MOVQ R15, AX
+ NOTQ AX
+ ADDQ DX, R10
+ ANDQ R9, AX
+ XORQ CX, AX
+ ADDQ R10, AX
+ MOVQ R11, DI
+ MOVQ R13, BX
+ RORQ $0x1c, DI
+ MOVQ R11, DX
+ ANDQ R12, BX
+ RORQ $0x22, DX
+ MOVQ R11, CX
+ ANDQ R13, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R11, DX
+ MOVQ R12, CX
+ RORQ $0x27, DX
+ ANDQ R11, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R10
+ ADDQ AX, R14
+ ADDQ AX, R10
+ MOVQ 112(SI), AX
+ BSWAPQ AX
+ MOVQ AX, 112(BP)
+ MOVQ $0x9bdc06a725c71235, DX
+ ADDQ AX, R9
+ MOVQ R14, AX
+ ADDQ DX, R9
+ MOVQ R14, CX
+ RORQ $0x0e, AX
+ MOVQ R14, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R14, CX
+ RORQ $0x29, DX
+ ANDQ R15, CX
+ XORQ AX, DX
+ MOVQ R14, AX
+ NOTQ AX
+ ADDQ DX, R9
+ ANDQ R8, AX
+ XORQ CX, AX
+ ADDQ R9, AX
+ MOVQ R10, DI
+ MOVQ R12, BX
+ RORQ $0x1c, DI
+ MOVQ R10, DX
+ ANDQ R11, BX
+ RORQ $0x22, DX
+ MOVQ R10, CX
+ ANDQ R12, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R10, DX
+ MOVQ R11, CX
+ RORQ $0x27, DX
+ ANDQ R10, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R9
+ ADDQ AX, R13
+ ADDQ AX, R9
+ MOVQ 120(SI), AX
+ BSWAPQ AX
+ MOVQ AX, 120(BP)
+ MOVQ $0xc19bf174cf692694, DX
+ ADDQ AX, R8
+ MOVQ R13, AX
+ ADDQ DX, R8
+ MOVQ R13, CX
+ RORQ $0x0e, AX
+ MOVQ R13, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R13, CX
+ RORQ $0x29, DX
+ ANDQ R14, CX
+ XORQ AX, DX
+ MOVQ R13, AX
+ NOTQ AX
+ ADDQ DX, R8
+ ANDQ R15, AX
+ XORQ CX, AX
+ ADDQ R8, AX
+ MOVQ R9, DI
+ MOVQ R11, BX
+ RORQ $0x1c, DI
+ MOVQ R9, DX
+ ANDQ R10, BX
+ RORQ $0x22, DX
+ MOVQ R9, CX
+ ANDQ R11, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R9, DX
+ MOVQ R10, CX
+ RORQ $0x27, DX
+ ANDQ R9, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R8
+ ADDQ AX, R12
+ ADDQ AX, R8
+ MOVQ 112(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 8(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 72(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ (BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 128(BP)
+ MOVQ $0xe49b69c19ef14ad2, DX
+ ADDQ AX, R15
+ MOVQ R12, AX
+ ADDQ DX, R15
+ MOVQ R12, CX
+ RORQ $0x0e, AX
+ MOVQ R12, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R12, CX
+ RORQ $0x29, DX
+ ANDQ R13, CX
+ XORQ AX, DX
+ MOVQ R12, AX
+ NOTQ AX
+ ADDQ DX, R15
+ ANDQ R14, AX
+ XORQ CX, AX
+ ADDQ R15, AX
+ MOVQ R8, DI
+ MOVQ R10, BX
+ RORQ $0x1c, DI
+ MOVQ R8, DX
+ ANDQ R9, BX
+ RORQ $0x22, DX
+ MOVQ R8, CX
+ ANDQ R10, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R8, DX
+ MOVQ R9, CX
+ RORQ $0x27, DX
+ ANDQ R8, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R15
+ ADDQ AX, R11
+ ADDQ AX, R15
+ MOVQ 120(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 16(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 80(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 8(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 136(BP)
+ MOVQ $0xefbe4786384f25e3, DX
+ ADDQ AX, R14
+ MOVQ R11, AX
+ ADDQ DX, R14
+ MOVQ R11, CX
+ RORQ $0x0e, AX
+ MOVQ R11, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R11, CX
+ RORQ $0x29, DX
+ ANDQ R12, CX
+ XORQ AX, DX
+ MOVQ R11, AX
+ NOTQ AX
+ ADDQ DX, R14
+ ANDQ R13, AX
+ XORQ CX, AX
+ ADDQ R14, AX
+ MOVQ R15, DI
+ MOVQ R9, BX
+ RORQ $0x1c, DI
+ MOVQ R15, DX
+ ANDQ R8, BX
+ RORQ $0x22, DX
+ MOVQ R15, CX
+ ANDQ R9, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R15, DX
+ MOVQ R8, CX
+ RORQ $0x27, DX
+ ANDQ R15, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R14
+ ADDQ AX, R10
+ ADDQ AX, R14
+ MOVQ 128(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 24(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 88(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 16(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 144(BP)
+ MOVQ $0x0fc19dc68b8cd5b5, DX
+ ADDQ AX, R13
+ MOVQ R10, AX
+ ADDQ DX, R13
+ MOVQ R10, CX
+ RORQ $0x0e, AX
+ MOVQ R10, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R10, CX
+ RORQ $0x29, DX
+ ANDQ R11, CX
+ XORQ AX, DX
+ MOVQ R10, AX
+ NOTQ AX
+ ADDQ DX, R13
+ ANDQ R12, AX
+ XORQ CX, AX
+ ADDQ R13, AX
+ MOVQ R14, DI
+ MOVQ R8, BX
+ RORQ $0x1c, DI
+ MOVQ R14, DX
+ ANDQ R15, BX
+ RORQ $0x22, DX
+ MOVQ R14, CX
+ ANDQ R8, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R14, DX
+ MOVQ R15, CX
+ RORQ $0x27, DX
+ ANDQ R14, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R13
+ ADDQ AX, R9
+ ADDQ AX, R13
+ MOVQ 136(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 32(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 96(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 24(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 152(BP)
+ MOVQ $0x240ca1cc77ac9c65, DX
+ ADDQ AX, R12
+ MOVQ R9, AX
+ ADDQ DX, R12
+ MOVQ R9, CX
+ RORQ $0x0e, AX
+ MOVQ R9, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R9, CX
+ RORQ $0x29, DX
+ ANDQ R10, CX
+ XORQ AX, DX
+ MOVQ R9, AX
+ NOTQ AX
+ ADDQ DX, R12
+ ANDQ R11, AX
+ XORQ CX, AX
+ ADDQ R12, AX
+ MOVQ R13, DI
+ MOVQ R15, BX
+ RORQ $0x1c, DI
+ MOVQ R13, DX
+ ANDQ R14, BX
+ RORQ $0x22, DX
+ MOVQ R13, CX
+ ANDQ R15, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R13, DX
+ MOVQ R14, CX
+ RORQ $0x27, DX
+ ANDQ R13, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R12
+ ADDQ AX, R8
+ ADDQ AX, R12
+ MOVQ 144(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 40(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 104(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 32(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 160(BP)
+ MOVQ $0x2de92c6f592b0275, DX
+ ADDQ AX, R11
+ MOVQ R8, AX
+ ADDQ DX, R11
+ MOVQ R8, CX
+ RORQ $0x0e, AX
+ MOVQ R8, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R8, CX
+ RORQ $0x29, DX
+ ANDQ R9, CX
+ XORQ AX, DX
+ MOVQ R8, AX
+ NOTQ AX
+ ADDQ DX, R11
+ ANDQ R10, AX
+ XORQ CX, AX
+ ADDQ R11, AX
+ MOVQ R12, DI
+ MOVQ R14, BX
+ RORQ $0x1c, DI
+ MOVQ R12, DX
+ ANDQ R13, BX
+ RORQ $0x22, DX
+ MOVQ R12, CX
+ ANDQ R14, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R12, DX
+ MOVQ R13, CX
+ RORQ $0x27, DX
+ ANDQ R12, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R11
+ ADDQ AX, R15
+ ADDQ AX, R11
+ MOVQ 152(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 48(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 112(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 40(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 168(BP)
+ MOVQ $0x4a7484aa6ea6e483, DX
+ ADDQ AX, R10
+ MOVQ R15, AX
+ ADDQ DX, R10
+ MOVQ R15, CX
+ RORQ $0x0e, AX
+ MOVQ R15, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R15, CX
+ RORQ $0x29, DX
+ ANDQ R8, CX
+ XORQ AX, DX
+ MOVQ R15, AX
+ NOTQ AX
+ ADDQ DX, R10
+ ANDQ R9, AX
+ XORQ CX, AX
+ ADDQ R10, AX
+ MOVQ R11, DI
+ MOVQ R13, BX
+ RORQ $0x1c, DI
+ MOVQ R11, DX
+ ANDQ R12, BX
+ RORQ $0x22, DX
+ MOVQ R11, CX
+ ANDQ R13, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R11, DX
+ MOVQ R12, CX
+ RORQ $0x27, DX
+ ANDQ R11, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R10
+ ADDQ AX, R14
+ ADDQ AX, R10
+ MOVQ 160(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 56(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 120(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 48(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 176(BP)
+ MOVQ $0x5cb0a9dcbd41fbd4, DX
+ ADDQ AX, R9
+ MOVQ R14, AX
+ ADDQ DX, R9
+ MOVQ R14, CX
+ RORQ $0x0e, AX
+ MOVQ R14, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R14, CX
+ RORQ $0x29, DX
+ ANDQ R15, CX
+ XORQ AX, DX
+ MOVQ R14, AX
+ NOTQ AX
+ ADDQ DX, R9
+ ANDQ R8, AX
+ XORQ CX, AX
+ ADDQ R9, AX
+ MOVQ R10, DI
+ MOVQ R12, BX
+ RORQ $0x1c, DI
+ MOVQ R10, DX
+ ANDQ R11, BX
+ RORQ $0x22, DX
+ MOVQ R10, CX
+ ANDQ R12, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R10, DX
+ MOVQ R11, CX
+ RORQ $0x27, DX
+ ANDQ R10, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R9
+ ADDQ AX, R13
+ ADDQ AX, R9
+ MOVQ 168(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 64(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 128(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 56(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 184(BP)
+ MOVQ $0x76f988da831153b5, DX
+ ADDQ AX, R8
+ MOVQ R13, AX
+ ADDQ DX, R8
+ MOVQ R13, CX
+ RORQ $0x0e, AX
+ MOVQ R13, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R13, CX
+ RORQ $0x29, DX
+ ANDQ R14, CX
+ XORQ AX, DX
+ MOVQ R13, AX
+ NOTQ AX
+ ADDQ DX, R8
+ ANDQ R15, AX
+ XORQ CX, AX
+ ADDQ R8, AX
+ MOVQ R9, DI
+ MOVQ R11, BX
+ RORQ $0x1c, DI
+ MOVQ R9, DX
+ ANDQ R10, BX
+ RORQ $0x22, DX
+ MOVQ R9, CX
+ ANDQ R11, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R9, DX
+ MOVQ R10, CX
+ RORQ $0x27, DX
+ ANDQ R9, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R8
+ ADDQ AX, R12
+ ADDQ AX, R8
+ MOVQ 176(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 72(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 136(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 64(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 192(BP)
+ MOVQ $0x983e5152ee66dfab, DX
+ ADDQ AX, R15
+ MOVQ R12, AX
+ ADDQ DX, R15
+ MOVQ R12, CX
+ RORQ $0x0e, AX
+ MOVQ R12, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R12, CX
+ RORQ $0x29, DX
+ ANDQ R13, CX
+ XORQ AX, DX
+ MOVQ R12, AX
+ NOTQ AX
+ ADDQ DX, R15
+ ANDQ R14, AX
+ XORQ CX, AX
+ ADDQ R15, AX
+ MOVQ R8, DI
+ MOVQ R10, BX
+ RORQ $0x1c, DI
+ MOVQ R8, DX
+ ANDQ R9, BX
+ RORQ $0x22, DX
+ MOVQ R8, CX
+ ANDQ R10, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R8, DX
+ MOVQ R9, CX
+ RORQ $0x27, DX
+ ANDQ R8, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R15
+ ADDQ AX, R11
+ ADDQ AX, R15
+ MOVQ 184(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 80(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 144(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 72(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 200(BP)
+ MOVQ $0xa831c66d2db43210, DX
+ ADDQ AX, R14
+ MOVQ R11, AX
+ ADDQ DX, R14
+ MOVQ R11, CX
+ RORQ $0x0e, AX
+ MOVQ R11, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R11, CX
+ RORQ $0x29, DX
+ ANDQ R12, CX
+ XORQ AX, DX
+ MOVQ R11, AX
+ NOTQ AX
+ ADDQ DX, R14
+ ANDQ R13, AX
+ XORQ CX, AX
+ ADDQ R14, AX
+ MOVQ R15, DI
+ MOVQ R9, BX
+ RORQ $0x1c, DI
+ MOVQ R15, DX
+ ANDQ R8, BX
+ RORQ $0x22, DX
+ MOVQ R15, CX
+ ANDQ R9, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R15, DX
+ MOVQ R8, CX
+ RORQ $0x27, DX
+ ANDQ R15, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R14
+ ADDQ AX, R10
+ ADDQ AX, R14
+ MOVQ 192(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 88(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 152(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 80(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 208(BP)
+ MOVQ $0xb00327c898fb213f, DX
+ ADDQ AX, R13
+ MOVQ R10, AX
+ ADDQ DX, R13
+ MOVQ R10, CX
+ RORQ $0x0e, AX
+ MOVQ R10, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R10, CX
+ RORQ $0x29, DX
+ ANDQ R11, CX
+ XORQ AX, DX
+ MOVQ R10, AX
+ NOTQ AX
+ ADDQ DX, R13
+ ANDQ R12, AX
+ XORQ CX, AX
+ ADDQ R13, AX
+ MOVQ R14, DI
+ MOVQ R8, BX
+ RORQ $0x1c, DI
+ MOVQ R14, DX
+ ANDQ R15, BX
+ RORQ $0x22, DX
+ MOVQ R14, CX
+ ANDQ R8, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R14, DX
+ MOVQ R15, CX
+ RORQ $0x27, DX
+ ANDQ R14, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R13
+ ADDQ AX, R9
+ ADDQ AX, R13
+ MOVQ 200(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 96(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 160(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 88(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 216(BP)
+ MOVQ $0xbf597fc7beef0ee4, DX
+ ADDQ AX, R12
+ MOVQ R9, AX
+ ADDQ DX, R12
+ MOVQ R9, CX
+ RORQ $0x0e, AX
+ MOVQ R9, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R9, CX
+ RORQ $0x29, DX
+ ANDQ R10, CX
+ XORQ AX, DX
+ MOVQ R9, AX
+ NOTQ AX
+ ADDQ DX, R12
+ ANDQ R11, AX
+ XORQ CX, AX
+ ADDQ R12, AX
+ MOVQ R13, DI
+ MOVQ R15, BX
+ RORQ $0x1c, DI
+ MOVQ R13, DX
+ ANDQ R14, BX
+ RORQ $0x22, DX
+ MOVQ R13, CX
+ ANDQ R15, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R13, DX
+ MOVQ R14, CX
+ RORQ $0x27, DX
+ ANDQ R13, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R12
+ ADDQ AX, R8
+ ADDQ AX, R12
+ MOVQ 208(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 104(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 168(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 96(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 224(BP)
+ MOVQ $0xc6e00bf33da88fc2, DX
+ ADDQ AX, R11
+ MOVQ R8, AX
+ ADDQ DX, R11
+ MOVQ R8, CX
+ RORQ $0x0e, AX
+ MOVQ R8, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R8, CX
+ RORQ $0x29, DX
+ ANDQ R9, CX
+ XORQ AX, DX
+ MOVQ R8, AX
+ NOTQ AX
+ ADDQ DX, R11
+ ANDQ R10, AX
+ XORQ CX, AX
+ ADDQ R11, AX
+ MOVQ R12, DI
+ MOVQ R14, BX
+ RORQ $0x1c, DI
+ MOVQ R12, DX
+ ANDQ R13, BX
+ RORQ $0x22, DX
+ MOVQ R12, CX
+ ANDQ R14, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R12, DX
+ MOVQ R13, CX
+ RORQ $0x27, DX
+ ANDQ R12, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R11
+ ADDQ AX, R15
+ ADDQ AX, R11
+ MOVQ 216(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 112(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 176(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 104(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 232(BP)
+ MOVQ $0xd5a79147930aa725, DX
+ ADDQ AX, R10
+ MOVQ R15, AX
+ ADDQ DX, R10
+ MOVQ R15, CX
+ RORQ $0x0e, AX
+ MOVQ R15, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R15, CX
+ RORQ $0x29, DX
+ ANDQ R8, CX
+ XORQ AX, DX
+ MOVQ R15, AX
+ NOTQ AX
+ ADDQ DX, R10
+ ANDQ R9, AX
+ XORQ CX, AX
+ ADDQ R10, AX
+ MOVQ R11, DI
+ MOVQ R13, BX
+ RORQ $0x1c, DI
+ MOVQ R11, DX
+ ANDQ R12, BX
+ RORQ $0x22, DX
+ MOVQ R11, CX
+ ANDQ R13, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R11, DX
+ MOVQ R12, CX
+ RORQ $0x27, DX
+ ANDQ R11, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R10
+ ADDQ AX, R14
+ ADDQ AX, R10
+ MOVQ 224(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 120(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 184(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 112(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 240(BP)
+ MOVQ $0x06ca6351e003826f, DX
+ ADDQ AX, R9
+ MOVQ R14, AX
+ ADDQ DX, R9
+ MOVQ R14, CX
+ RORQ $0x0e, AX
+ MOVQ R14, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R14, CX
+ RORQ $0x29, DX
+ ANDQ R15, CX
+ XORQ AX, DX
+ MOVQ R14, AX
+ NOTQ AX
+ ADDQ DX, R9
+ ANDQ R8, AX
+ XORQ CX, AX
+ ADDQ R9, AX
+ MOVQ R10, DI
+ MOVQ R12, BX
+ RORQ $0x1c, DI
+ MOVQ R10, DX
+ ANDQ R11, BX
+ RORQ $0x22, DX
+ MOVQ R10, CX
+ ANDQ R12, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R10, DX
+ MOVQ R11, CX
+ RORQ $0x27, DX
+ ANDQ R10, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R9
+ ADDQ AX, R13
+ ADDQ AX, R9
+ MOVQ 232(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 128(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 192(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 120(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 248(BP)
+ MOVQ $0x142929670a0e6e70, DX
+ ADDQ AX, R8
+ MOVQ R13, AX
+ ADDQ DX, R8
+ MOVQ R13, CX
+ RORQ $0x0e, AX
+ MOVQ R13, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R13, CX
+ RORQ $0x29, DX
+ ANDQ R14, CX
+ XORQ AX, DX
+ MOVQ R13, AX
+ NOTQ AX
+ ADDQ DX, R8
+ ANDQ R15, AX
+ XORQ CX, AX
+ ADDQ R8, AX
+ MOVQ R9, DI
+ MOVQ R11, BX
+ RORQ $0x1c, DI
+ MOVQ R9, DX
+ ANDQ R10, BX
+ RORQ $0x22, DX
+ MOVQ R9, CX
+ ANDQ R11, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R9, DX
+ MOVQ R10, CX
+ RORQ $0x27, DX
+ ANDQ R9, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R8
+ ADDQ AX, R12
+ ADDQ AX, R8
+ MOVQ 240(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 136(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 200(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 128(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 256(BP)
+ MOVQ $0x27b70a8546d22ffc, DX
+ ADDQ AX, R15
+ MOVQ R12, AX
+ ADDQ DX, R15
+ MOVQ R12, CX
+ RORQ $0x0e, AX
+ MOVQ R12, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R12, CX
+ RORQ $0x29, DX
+ ANDQ R13, CX
+ XORQ AX, DX
+ MOVQ R12, AX
+ NOTQ AX
+ ADDQ DX, R15
+ ANDQ R14, AX
+ XORQ CX, AX
+ ADDQ R15, AX
+ MOVQ R8, DI
+ MOVQ R10, BX
+ RORQ $0x1c, DI
+ MOVQ R8, DX
+ ANDQ R9, BX
+ RORQ $0x22, DX
+ MOVQ R8, CX
+ ANDQ R10, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R8, DX
+ MOVQ R9, CX
+ RORQ $0x27, DX
+ ANDQ R8, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R15
+ ADDQ AX, R11
+ ADDQ AX, R15
+ MOVQ 248(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 144(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 208(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 136(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 264(BP)
+ MOVQ $0x2e1b21385c26c926, DX
+ ADDQ AX, R14
+ MOVQ R11, AX
+ ADDQ DX, R14
+ MOVQ R11, CX
+ RORQ $0x0e, AX
+ MOVQ R11, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R11, CX
+ RORQ $0x29, DX
+ ANDQ R12, CX
+ XORQ AX, DX
+ MOVQ R11, AX
+ NOTQ AX
+ ADDQ DX, R14
+ ANDQ R13, AX
+ XORQ CX, AX
+ ADDQ R14, AX
+ MOVQ R15, DI
+ MOVQ R9, BX
+ RORQ $0x1c, DI
+ MOVQ R15, DX
+ ANDQ R8, BX
+ RORQ $0x22, DX
+ MOVQ R15, CX
+ ANDQ R9, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R15, DX
+ MOVQ R8, CX
+ RORQ $0x27, DX
+ ANDQ R15, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R14
+ ADDQ AX, R10
+ ADDQ AX, R14
+ MOVQ 256(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 152(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 216(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 144(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 272(BP)
+ MOVQ $0x4d2c6dfc5ac42aed, DX
+ ADDQ AX, R13
+ MOVQ R10, AX
+ ADDQ DX, R13
+ MOVQ R10, CX
+ RORQ $0x0e, AX
+ MOVQ R10, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R10, CX
+ RORQ $0x29, DX
+ ANDQ R11, CX
+ XORQ AX, DX
+ MOVQ R10, AX
+ NOTQ AX
+ ADDQ DX, R13
+ ANDQ R12, AX
+ XORQ CX, AX
+ ADDQ R13, AX
+ MOVQ R14, DI
+ MOVQ R8, BX
+ RORQ $0x1c, DI
+ MOVQ R14, DX
+ ANDQ R15, BX
+ RORQ $0x22, DX
+ MOVQ R14, CX
+ ANDQ R8, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R14, DX
+ MOVQ R15, CX
+ RORQ $0x27, DX
+ ANDQ R14, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R13
+ ADDQ AX, R9
+ ADDQ AX, R13
+ MOVQ 264(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 160(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 224(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 152(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 280(BP)
+ MOVQ $0x53380d139d95b3df, DX
+ ADDQ AX, R12
+ MOVQ R9, AX
+ ADDQ DX, R12
+ MOVQ R9, CX
+ RORQ $0x0e, AX
+ MOVQ R9, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R9, CX
+ RORQ $0x29, DX
+ ANDQ R10, CX
+ XORQ AX, DX
+ MOVQ R9, AX
+ NOTQ AX
+ ADDQ DX, R12
+ ANDQ R11, AX
+ XORQ CX, AX
+ ADDQ R12, AX
+ MOVQ R13, DI
+ MOVQ R15, BX
+ RORQ $0x1c, DI
+ MOVQ R13, DX
+ ANDQ R14, BX
+ RORQ $0x22, DX
+ MOVQ R13, CX
+ ANDQ R15, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R13, DX
+ MOVQ R14, CX
+ RORQ $0x27, DX
+ ANDQ R13, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R12
+ ADDQ AX, R8
+ ADDQ AX, R12
+ MOVQ 272(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 168(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 232(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 160(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 288(BP)
+ MOVQ $0x650a73548baf63de, DX
+ ADDQ AX, R11
+ MOVQ R8, AX
+ ADDQ DX, R11
+ MOVQ R8, CX
+ RORQ $0x0e, AX
+ MOVQ R8, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R8, CX
+ RORQ $0x29, DX
+ ANDQ R9, CX
+ XORQ AX, DX
+ MOVQ R8, AX
+ NOTQ AX
+ ADDQ DX, R11
+ ANDQ R10, AX
+ XORQ CX, AX
+ ADDQ R11, AX
+ MOVQ R12, DI
+ MOVQ R14, BX
+ RORQ $0x1c, DI
+ MOVQ R12, DX
+ ANDQ R13, BX
+ RORQ $0x22, DX
+ MOVQ R12, CX
+ ANDQ R14, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R12, DX
+ MOVQ R13, CX
+ RORQ $0x27, DX
+ ANDQ R12, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R11
+ ADDQ AX, R15
+ ADDQ AX, R11
+ MOVQ 280(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 176(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 240(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 168(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 296(BP)
+ MOVQ $0x766a0abb3c77b2a8, DX
+ ADDQ AX, R10
+ MOVQ R15, AX
+ ADDQ DX, R10
+ MOVQ R15, CX
+ RORQ $0x0e, AX
+ MOVQ R15, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R15, CX
+ RORQ $0x29, DX
+ ANDQ R8, CX
+ XORQ AX, DX
+ MOVQ R15, AX
+ NOTQ AX
+ ADDQ DX, R10
+ ANDQ R9, AX
+ XORQ CX, AX
+ ADDQ R10, AX
+ MOVQ R11, DI
+ MOVQ R13, BX
+ RORQ $0x1c, DI
+ MOVQ R11, DX
+ ANDQ R12, BX
+ RORQ $0x22, DX
+ MOVQ R11, CX
+ ANDQ R13, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R11, DX
+ MOVQ R12, CX
+ RORQ $0x27, DX
+ ANDQ R11, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R10
+ ADDQ AX, R14
+ ADDQ AX, R10
+ MOVQ 288(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 184(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 248(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 176(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 304(BP)
+ MOVQ $0x81c2c92e47edaee6, DX
+ ADDQ AX, R9
+ MOVQ R14, AX
+ ADDQ DX, R9
+ MOVQ R14, CX
+ RORQ $0x0e, AX
+ MOVQ R14, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R14, CX
+ RORQ $0x29, DX
+ ANDQ R15, CX
+ XORQ AX, DX
+ MOVQ R14, AX
+ NOTQ AX
+ ADDQ DX, R9
+ ANDQ R8, AX
+ XORQ CX, AX
+ ADDQ R9, AX
+ MOVQ R10, DI
+ MOVQ R12, BX
+ RORQ $0x1c, DI
+ MOVQ R10, DX
+ ANDQ R11, BX
+ RORQ $0x22, DX
+ MOVQ R10, CX
+ ANDQ R12, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R10, DX
+ MOVQ R11, CX
+ RORQ $0x27, DX
+ ANDQ R10, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R9
+ ADDQ AX, R13
+ ADDQ AX, R9
+ MOVQ 296(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 192(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 256(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 184(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 312(BP)
+ MOVQ $0x92722c851482353b, DX
+ ADDQ AX, R8
+ MOVQ R13, AX
+ ADDQ DX, R8
+ MOVQ R13, CX
+ RORQ $0x0e, AX
+ MOVQ R13, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R13, CX
+ RORQ $0x29, DX
+ ANDQ R14, CX
+ XORQ AX, DX
+ MOVQ R13, AX
+ NOTQ AX
+ ADDQ DX, R8
+ ANDQ R15, AX
+ XORQ CX, AX
+ ADDQ R8, AX
+ MOVQ R9, DI
+ MOVQ R11, BX
+ RORQ $0x1c, DI
+ MOVQ R9, DX
+ ANDQ R10, BX
+ RORQ $0x22, DX
+ MOVQ R9, CX
+ ANDQ R11, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R9, DX
+ MOVQ R10, CX
+ RORQ $0x27, DX
+ ANDQ R9, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R8
+ ADDQ AX, R12
+ ADDQ AX, R8
+ MOVQ 304(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 200(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 264(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 192(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 320(BP)
+ MOVQ $0xa2bfe8a14cf10364, DX
+ ADDQ AX, R15
+ MOVQ R12, AX
+ ADDQ DX, R15
+ MOVQ R12, CX
+ RORQ $0x0e, AX
+ MOVQ R12, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R12, CX
+ RORQ $0x29, DX
+ ANDQ R13, CX
+ XORQ AX, DX
+ MOVQ R12, AX
+ NOTQ AX
+ ADDQ DX, R15
+ ANDQ R14, AX
+ XORQ CX, AX
+ ADDQ R15, AX
+ MOVQ R8, DI
+ MOVQ R10, BX
+ RORQ $0x1c, DI
+ MOVQ R8, DX
+ ANDQ R9, BX
+ RORQ $0x22, DX
+ MOVQ R8, CX
+ ANDQ R10, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R8, DX
+ MOVQ R9, CX
+ RORQ $0x27, DX
+ ANDQ R8, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R15
+ ADDQ AX, R11
+ ADDQ AX, R15
+ MOVQ 312(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 208(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 272(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 200(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 328(BP)
+ MOVQ $0xa81a664bbc423001, DX
+ ADDQ AX, R14
+ MOVQ R11, AX
+ ADDQ DX, R14
+ MOVQ R11, CX
+ RORQ $0x0e, AX
+ MOVQ R11, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R11, CX
+ RORQ $0x29, DX
+ ANDQ R12, CX
+ XORQ AX, DX
+ MOVQ R11, AX
+ NOTQ AX
+ ADDQ DX, R14
+ ANDQ R13, AX
+ XORQ CX, AX
+ ADDQ R14, AX
+ MOVQ R15, DI
+ MOVQ R9, BX
+ RORQ $0x1c, DI
+ MOVQ R15, DX
+ ANDQ R8, BX
+ RORQ $0x22, DX
+ MOVQ R15, CX
+ ANDQ R9, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R15, DX
+ MOVQ R8, CX
+ RORQ $0x27, DX
+ ANDQ R15, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R14
+ ADDQ AX, R10
+ ADDQ AX, R14
+ MOVQ 320(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 216(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 280(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 208(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 336(BP)
+ MOVQ $0xc24b8b70d0f89791, DX
+ ADDQ AX, R13
+ MOVQ R10, AX
+ ADDQ DX, R13
+ MOVQ R10, CX
+ RORQ $0x0e, AX
+ MOVQ R10, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R10, CX
+ RORQ $0x29, DX
+ ANDQ R11, CX
+ XORQ AX, DX
+ MOVQ R10, AX
+ NOTQ AX
+ ADDQ DX, R13
+ ANDQ R12, AX
+ XORQ CX, AX
+ ADDQ R13, AX
+ MOVQ R14, DI
+ MOVQ R8, BX
+ RORQ $0x1c, DI
+ MOVQ R14, DX
+ ANDQ R15, BX
+ RORQ $0x22, DX
+ MOVQ R14, CX
+ ANDQ R8, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R14, DX
+ MOVQ R15, CX
+ RORQ $0x27, DX
+ ANDQ R14, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R13
+ ADDQ AX, R9
+ ADDQ AX, R13
+ MOVQ 328(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 224(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 288(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 216(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 344(BP)
+ MOVQ $0xc76c51a30654be30, DX
+ ADDQ AX, R12
+ MOVQ R9, AX
+ ADDQ DX, R12
+ MOVQ R9, CX
+ RORQ $0x0e, AX
+ MOVQ R9, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R9, CX
+ RORQ $0x29, DX
+ ANDQ R10, CX
+ XORQ AX, DX
+ MOVQ R9, AX
+ NOTQ AX
+ ADDQ DX, R12
+ ANDQ R11, AX
+ XORQ CX, AX
+ ADDQ R12, AX
+ MOVQ R13, DI
+ MOVQ R15, BX
+ RORQ $0x1c, DI
+ MOVQ R13, DX
+ ANDQ R14, BX
+ RORQ $0x22, DX
+ MOVQ R13, CX
+ ANDQ R15, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R13, DX
+ MOVQ R14, CX
+ RORQ $0x27, DX
+ ANDQ R13, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R12
+ ADDQ AX, R8
+ ADDQ AX, R12
+ MOVQ 336(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 232(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 296(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 224(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 352(BP)
+ MOVQ $0xd192e819d6ef5218, DX
+ ADDQ AX, R11
+ MOVQ R8, AX
+ ADDQ DX, R11
+ MOVQ R8, CX
+ RORQ $0x0e, AX
+ MOVQ R8, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R8, CX
+ RORQ $0x29, DX
+ ANDQ R9, CX
+ XORQ AX, DX
+ MOVQ R8, AX
+ NOTQ AX
+ ADDQ DX, R11
+ ANDQ R10, AX
+ XORQ CX, AX
+ ADDQ R11, AX
+ MOVQ R12, DI
+ MOVQ R14, BX
+ RORQ $0x1c, DI
+ MOVQ R12, DX
+ ANDQ R13, BX
+ RORQ $0x22, DX
+ MOVQ R12, CX
+ ANDQ R14, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R12, DX
+ MOVQ R13, CX
+ RORQ $0x27, DX
+ ANDQ R12, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R11
+ ADDQ AX, R15
+ ADDQ AX, R11
+ MOVQ 344(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 240(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 304(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 232(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 360(BP)
+ MOVQ $0xd69906245565a910, DX
+ ADDQ AX, R10
+ MOVQ R15, AX
+ ADDQ DX, R10
+ MOVQ R15, CX
+ RORQ $0x0e, AX
+ MOVQ R15, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R15, CX
+ RORQ $0x29, DX
+ ANDQ R8, CX
+ XORQ AX, DX
+ MOVQ R15, AX
+ NOTQ AX
+ ADDQ DX, R10
+ ANDQ R9, AX
+ XORQ CX, AX
+ ADDQ R10, AX
+ MOVQ R11, DI
+ MOVQ R13, BX
+ RORQ $0x1c, DI
+ MOVQ R11, DX
+ ANDQ R12, BX
+ RORQ $0x22, DX
+ MOVQ R11, CX
+ ANDQ R13, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R11, DX
+ MOVQ R12, CX
+ RORQ $0x27, DX
+ ANDQ R11, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R10
+ ADDQ AX, R14
+ ADDQ AX, R10
+ MOVQ 352(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 248(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 312(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 240(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 368(BP)
+ MOVQ $0xf40e35855771202a, DX
+ ADDQ AX, R9
+ MOVQ R14, AX
+ ADDQ DX, R9
+ MOVQ R14, CX
+ RORQ $0x0e, AX
+ MOVQ R14, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R14, CX
+ RORQ $0x29, DX
+ ANDQ R15, CX
+ XORQ AX, DX
+ MOVQ R14, AX
+ NOTQ AX
+ ADDQ DX, R9
+ ANDQ R8, AX
+ XORQ CX, AX
+ ADDQ R9, AX
+ MOVQ R10, DI
+ MOVQ R12, BX
+ RORQ $0x1c, DI
+ MOVQ R10, DX
+ ANDQ R11, BX
+ RORQ $0x22, DX
+ MOVQ R10, CX
+ ANDQ R12, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R10, DX
+ MOVQ R11, CX
+ RORQ $0x27, DX
+ ANDQ R10, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R9
+ ADDQ AX, R13
+ ADDQ AX, R9
+ MOVQ 360(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 256(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 320(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 248(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 376(BP)
+ MOVQ $0x106aa07032bbd1b8, DX
+ ADDQ AX, R8
+ MOVQ R13, AX
+ ADDQ DX, R8
+ MOVQ R13, CX
+ RORQ $0x0e, AX
+ MOVQ R13, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R13, CX
+ RORQ $0x29, DX
+ ANDQ R14, CX
+ XORQ AX, DX
+ MOVQ R13, AX
+ NOTQ AX
+ ADDQ DX, R8
+ ANDQ R15, AX
+ XORQ CX, AX
+ ADDQ R8, AX
+ MOVQ R9, DI
+ MOVQ R11, BX
+ RORQ $0x1c, DI
+ MOVQ R9, DX
+ ANDQ R10, BX
+ RORQ $0x22, DX
+ MOVQ R9, CX
+ ANDQ R11, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R9, DX
+ MOVQ R10, CX
+ RORQ $0x27, DX
+ ANDQ R9, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R8
+ ADDQ AX, R12
+ ADDQ AX, R8
+ MOVQ 368(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 264(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 328(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 256(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 384(BP)
+ MOVQ $0x19a4c116b8d2d0c8, DX
+ ADDQ AX, R15
+ MOVQ R12, AX
+ ADDQ DX, R15
+ MOVQ R12, CX
+ RORQ $0x0e, AX
+ MOVQ R12, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R12, CX
+ RORQ $0x29, DX
+ ANDQ R13, CX
+ XORQ AX, DX
+ MOVQ R12, AX
+ NOTQ AX
+ ADDQ DX, R15
+ ANDQ R14, AX
+ XORQ CX, AX
+ ADDQ R15, AX
+ MOVQ R8, DI
+ MOVQ R10, BX
+ RORQ $0x1c, DI
+ MOVQ R8, DX
+ ANDQ R9, BX
+ RORQ $0x22, DX
+ MOVQ R8, CX
+ ANDQ R10, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R8, DX
+ MOVQ R9, CX
+ RORQ $0x27, DX
+ ANDQ R8, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R15
+ ADDQ AX, R11
+ ADDQ AX, R15
+ MOVQ 376(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 272(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 336(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 264(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 392(BP)
+ MOVQ $0x1e376c085141ab53, DX
+ ADDQ AX, R14
+ MOVQ R11, AX
+ ADDQ DX, R14
+ MOVQ R11, CX
+ RORQ $0x0e, AX
+ MOVQ R11, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R11, CX
+ RORQ $0x29, DX
+ ANDQ R12, CX
+ XORQ AX, DX
+ MOVQ R11, AX
+ NOTQ AX
+ ADDQ DX, R14
+ ANDQ R13, AX
+ XORQ CX, AX
+ ADDQ R14, AX
+ MOVQ R15, DI
+ MOVQ R9, BX
+ RORQ $0x1c, DI
+ MOVQ R15, DX
+ ANDQ R8, BX
+ RORQ $0x22, DX
+ MOVQ R15, CX
+ ANDQ R9, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R15, DX
+ MOVQ R8, CX
+ RORQ $0x27, DX
+ ANDQ R15, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R14
+ ADDQ AX, R10
+ ADDQ AX, R14
+ MOVQ 384(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 280(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 344(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 272(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 400(BP)
+ MOVQ $0x2748774cdf8eeb99, DX
+ ADDQ AX, R13
+ MOVQ R10, AX
+ ADDQ DX, R13
+ MOVQ R10, CX
+ RORQ $0x0e, AX
+ MOVQ R10, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R10, CX
+ RORQ $0x29, DX
+ ANDQ R11, CX
+ XORQ AX, DX
+ MOVQ R10, AX
+ NOTQ AX
+ ADDQ DX, R13
+ ANDQ R12, AX
+ XORQ CX, AX
+ ADDQ R13, AX
+ MOVQ R14, DI
+ MOVQ R8, BX
+ RORQ $0x1c, DI
+ MOVQ R14, DX
+ ANDQ R15, BX
+ RORQ $0x22, DX
+ MOVQ R14, CX
+ ANDQ R8, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R14, DX
+ MOVQ R15, CX
+ RORQ $0x27, DX
+ ANDQ R14, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R13
+ ADDQ AX, R9
+ ADDQ AX, R13
+ MOVQ 392(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 288(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 352(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 280(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 408(BP)
+ MOVQ $0x34b0bcb5e19b48a8, DX
+ ADDQ AX, R12
+ MOVQ R9, AX
+ ADDQ DX, R12
+ MOVQ R9, CX
+ RORQ $0x0e, AX
+ MOVQ R9, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R9, CX
+ RORQ $0x29, DX
+ ANDQ R10, CX
+ XORQ AX, DX
+ MOVQ R9, AX
+ NOTQ AX
+ ADDQ DX, R12
+ ANDQ R11, AX
+ XORQ CX, AX
+ ADDQ R12, AX
+ MOVQ R13, DI
+ MOVQ R15, BX
+ RORQ $0x1c, DI
+ MOVQ R13, DX
+ ANDQ R14, BX
+ RORQ $0x22, DX
+ MOVQ R13, CX
+ ANDQ R15, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R13, DX
+ MOVQ R14, CX
+ RORQ $0x27, DX
+ ANDQ R13, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R12
+ ADDQ AX, R8
+ ADDQ AX, R12
+ MOVQ 400(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 296(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 360(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 288(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 416(BP)
+ MOVQ $0x391c0cb3c5c95a63, DX
+ ADDQ AX, R11
+ MOVQ R8, AX
+ ADDQ DX, R11
+ MOVQ R8, CX
+ RORQ $0x0e, AX
+ MOVQ R8, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R8, CX
+ RORQ $0x29, DX
+ ANDQ R9, CX
+ XORQ AX, DX
+ MOVQ R8, AX
+ NOTQ AX
+ ADDQ DX, R11
+ ANDQ R10, AX
+ XORQ CX, AX
+ ADDQ R11, AX
+ MOVQ R12, DI
+ MOVQ R14, BX
+ RORQ $0x1c, DI
+ MOVQ R12, DX
+ ANDQ R13, BX
+ RORQ $0x22, DX
+ MOVQ R12, CX
+ ANDQ R14, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R12, DX
+ MOVQ R13, CX
+ RORQ $0x27, DX
+ ANDQ R12, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R11
+ ADDQ AX, R15
+ ADDQ AX, R11
+ MOVQ 408(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 304(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 368(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 296(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 424(BP)
+ MOVQ $0x4ed8aa4ae3418acb, DX
+ ADDQ AX, R10
+ MOVQ R15, AX
+ ADDQ DX, R10
+ MOVQ R15, CX
+ RORQ $0x0e, AX
+ MOVQ R15, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R15, CX
+ RORQ $0x29, DX
+ ANDQ R8, CX
+ XORQ AX, DX
+ MOVQ R15, AX
+ NOTQ AX
+ ADDQ DX, R10
+ ANDQ R9, AX
+ XORQ CX, AX
+ ADDQ R10, AX
+ MOVQ R11, DI
+ MOVQ R13, BX
+ RORQ $0x1c, DI
+ MOVQ R11, DX
+ ANDQ R12, BX
+ RORQ $0x22, DX
+ MOVQ R11, CX
+ ANDQ R13, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R11, DX
+ MOVQ R12, CX
+ RORQ $0x27, DX
+ ANDQ R11, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R10
+ ADDQ AX, R14
+ ADDQ AX, R10
+ MOVQ 416(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 312(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 376(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 304(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 432(BP)
+ MOVQ $0x5b9cca4f7763e373, DX
+ ADDQ AX, R9
+ MOVQ R14, AX
+ ADDQ DX, R9
+ MOVQ R14, CX
+ RORQ $0x0e, AX
+ MOVQ R14, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R14, CX
+ RORQ $0x29, DX
+ ANDQ R15, CX
+ XORQ AX, DX
+ MOVQ R14, AX
+ NOTQ AX
+ ADDQ DX, R9
+ ANDQ R8, AX
+ XORQ CX, AX
+ ADDQ R9, AX
+ MOVQ R10, DI
+ MOVQ R12, BX
+ RORQ $0x1c, DI
+ MOVQ R10, DX
+ ANDQ R11, BX
+ RORQ $0x22, DX
+ MOVQ R10, CX
+ ANDQ R12, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R10, DX
+ MOVQ R11, CX
+ RORQ $0x27, DX
+ ANDQ R10, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R9
+ ADDQ AX, R13
+ ADDQ AX, R9
+ MOVQ 424(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 320(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 384(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 312(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 440(BP)
+ MOVQ $0x682e6ff3d6b2b8a3, DX
+ ADDQ AX, R8
+ MOVQ R13, AX
+ ADDQ DX, R8
+ MOVQ R13, CX
+ RORQ $0x0e, AX
+ MOVQ R13, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R13, CX
+ RORQ $0x29, DX
+ ANDQ R14, CX
+ XORQ AX, DX
+ MOVQ R13, AX
+ NOTQ AX
+ ADDQ DX, R8
+ ANDQ R15, AX
+ XORQ CX, AX
+ ADDQ R8, AX
+ MOVQ R9, DI
+ MOVQ R11, BX
+ RORQ $0x1c, DI
+ MOVQ R9, DX
+ ANDQ R10, BX
+ RORQ $0x22, DX
+ MOVQ R9, CX
+ ANDQ R11, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R9, DX
+ MOVQ R10, CX
+ RORQ $0x27, DX
+ ANDQ R9, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R8
+ ADDQ AX, R12
+ ADDQ AX, R8
+ MOVQ 432(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 328(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 392(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 320(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 448(BP)
+ MOVQ $0x748f82ee5defb2fc, DX
+ ADDQ AX, R15
+ MOVQ R12, AX
+ ADDQ DX, R15
+ MOVQ R12, CX
+ RORQ $0x0e, AX
+ MOVQ R12, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R12, CX
+ RORQ $0x29, DX
+ ANDQ R13, CX
+ XORQ AX, DX
+ MOVQ R12, AX
+ NOTQ AX
+ ADDQ DX, R15
+ ANDQ R14, AX
+ XORQ CX, AX
+ ADDQ R15, AX
+ MOVQ R8, DI
+ MOVQ R10, BX
+ RORQ $0x1c, DI
+ MOVQ R8, DX
+ ANDQ R9, BX
+ RORQ $0x22, DX
+ MOVQ R8, CX
+ ANDQ R10, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R8, DX
+ MOVQ R9, CX
+ RORQ $0x27, DX
+ ANDQ R8, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R15
+ ADDQ AX, R11
+ ADDQ AX, R15
+ MOVQ 440(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 336(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 400(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 328(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 456(BP)
+ MOVQ $0x78a5636f43172f60, DX
+ ADDQ AX, R14
+ MOVQ R11, AX
+ ADDQ DX, R14
+ MOVQ R11, CX
+ RORQ $0x0e, AX
+ MOVQ R11, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R11, CX
+ RORQ $0x29, DX
+ ANDQ R12, CX
+ XORQ AX, DX
+ MOVQ R11, AX
+ NOTQ AX
+ ADDQ DX, R14
+ ANDQ R13, AX
+ XORQ CX, AX
+ ADDQ R14, AX
+ MOVQ R15, DI
+ MOVQ R9, BX
+ RORQ $0x1c, DI
+ MOVQ R15, DX
+ ANDQ R8, BX
+ RORQ $0x22, DX
+ MOVQ R15, CX
+ ANDQ R9, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R15, DX
+ MOVQ R8, CX
+ RORQ $0x27, DX
+ ANDQ R15, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R14
+ ADDQ AX, R10
+ ADDQ AX, R14
+ MOVQ 448(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 344(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 408(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 336(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 464(BP)
+ MOVQ $0x84c87814a1f0ab72, DX
+ ADDQ AX, R13
+ MOVQ R10, AX
+ ADDQ DX, R13
+ MOVQ R10, CX
+ RORQ $0x0e, AX
+ MOVQ R10, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R10, CX
+ RORQ $0x29, DX
+ ANDQ R11, CX
+ XORQ AX, DX
+ MOVQ R10, AX
+ NOTQ AX
+ ADDQ DX, R13
+ ANDQ R12, AX
+ XORQ CX, AX
+ ADDQ R13, AX
+ MOVQ R14, DI
+ MOVQ R8, BX
+ RORQ $0x1c, DI
+ MOVQ R14, DX
+ ANDQ R15, BX
+ RORQ $0x22, DX
+ MOVQ R14, CX
+ ANDQ R8, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R14, DX
+ MOVQ R15, CX
+ RORQ $0x27, DX
+ ANDQ R14, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R13
+ ADDQ AX, R9
+ ADDQ AX, R13
+ MOVQ 456(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 352(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 416(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 344(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 472(BP)
+ MOVQ $0x8cc702081a6439ec, DX
+ ADDQ AX, R12
+ MOVQ R9, AX
+ ADDQ DX, R12
+ MOVQ R9, CX
+ RORQ $0x0e, AX
+ MOVQ R9, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R9, CX
+ RORQ $0x29, DX
+ ANDQ R10, CX
+ XORQ AX, DX
+ MOVQ R9, AX
+ NOTQ AX
+ ADDQ DX, R12
+ ANDQ R11, AX
+ XORQ CX, AX
+ ADDQ R12, AX
+ MOVQ R13, DI
+ MOVQ R15, BX
+ RORQ $0x1c, DI
+ MOVQ R13, DX
+ ANDQ R14, BX
+ RORQ $0x22, DX
+ MOVQ R13, CX
+ ANDQ R15, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R13, DX
+ MOVQ R14, CX
+ RORQ $0x27, DX
+ ANDQ R13, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R12
+ ADDQ AX, R8
+ ADDQ AX, R12
+ MOVQ 464(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 360(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 424(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 352(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 480(BP)
+ MOVQ $0x90befffa23631e28, DX
+ ADDQ AX, R11
+ MOVQ R8, AX
+ ADDQ DX, R11
+ MOVQ R8, CX
+ RORQ $0x0e, AX
+ MOVQ R8, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R8, CX
+ RORQ $0x29, DX
+ ANDQ R9, CX
+ XORQ AX, DX
+ MOVQ R8, AX
+ NOTQ AX
+ ADDQ DX, R11
+ ANDQ R10, AX
+ XORQ CX, AX
+ ADDQ R11, AX
+ MOVQ R12, DI
+ MOVQ R14, BX
+ RORQ $0x1c, DI
+ MOVQ R12, DX
+ ANDQ R13, BX
+ RORQ $0x22, DX
+ MOVQ R12, CX
+ ANDQ R14, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R12, DX
+ MOVQ R13, CX
+ RORQ $0x27, DX
+ ANDQ R12, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R11
+ ADDQ AX, R15
+ ADDQ AX, R11
+ MOVQ 472(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 368(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 432(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 360(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 488(BP)
+ MOVQ $0xa4506cebde82bde9, DX
+ ADDQ AX, R10
+ MOVQ R15, AX
+ ADDQ DX, R10
+ MOVQ R15, CX
+ RORQ $0x0e, AX
+ MOVQ R15, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R15, CX
+ RORQ $0x29, DX
+ ANDQ R8, CX
+ XORQ AX, DX
+ MOVQ R15, AX
+ NOTQ AX
+ ADDQ DX, R10
+ ANDQ R9, AX
+ XORQ CX, AX
+ ADDQ R10, AX
+ MOVQ R11, DI
+ MOVQ R13, BX
+ RORQ $0x1c, DI
+ MOVQ R11, DX
+ ANDQ R12, BX
+ RORQ $0x22, DX
+ MOVQ R11, CX
+ ANDQ R13, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R11, DX
+ MOVQ R12, CX
+ RORQ $0x27, DX
+ ANDQ R11, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R10
+ ADDQ AX, R14
+ ADDQ AX, R10
+ MOVQ 480(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 376(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 440(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 368(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 496(BP)
+ MOVQ $0xbef9a3f7b2c67915, DX
+ ADDQ AX, R9
+ MOVQ R14, AX
+ ADDQ DX, R9
+ MOVQ R14, CX
+ RORQ $0x0e, AX
+ MOVQ R14, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R14, CX
+ RORQ $0x29, DX
+ ANDQ R15, CX
+ XORQ AX, DX
+ MOVQ R14, AX
+ NOTQ AX
+ ADDQ DX, R9
+ ANDQ R8, AX
+ XORQ CX, AX
+ ADDQ R9, AX
+ MOVQ R10, DI
+ MOVQ R12, BX
+ RORQ $0x1c, DI
+ MOVQ R10, DX
+ ANDQ R11, BX
+ RORQ $0x22, DX
+ MOVQ R10, CX
+ ANDQ R12, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R10, DX
+ MOVQ R11, CX
+ RORQ $0x27, DX
+ ANDQ R10, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R9
+ ADDQ AX, R13
+ ADDQ AX, R9
+ MOVQ 488(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 384(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 448(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 376(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 504(BP)
+ MOVQ $0xc67178f2e372532b, DX
+ ADDQ AX, R8
+ MOVQ R13, AX
+ ADDQ DX, R8
+ MOVQ R13, CX
+ RORQ $0x0e, AX
+ MOVQ R13, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R13, CX
+ RORQ $0x29, DX
+ ANDQ R14, CX
+ XORQ AX, DX
+ MOVQ R13, AX
+ NOTQ AX
+ ADDQ DX, R8
+ ANDQ R15, AX
+ XORQ CX, AX
+ ADDQ R8, AX
+ MOVQ R9, DI
+ MOVQ R11, BX
+ RORQ $0x1c, DI
+ MOVQ R9, DX
+ ANDQ R10, BX
+ RORQ $0x22, DX
+ MOVQ R9, CX
+ ANDQ R11, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R9, DX
+ MOVQ R10, CX
+ RORQ $0x27, DX
+ ANDQ R9, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R8
+ ADDQ AX, R12
+ ADDQ AX, R8
+ MOVQ 496(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 392(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 456(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 384(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 512(BP)
+ MOVQ $0xca273eceea26619c, DX
+ ADDQ AX, R15
+ MOVQ R12, AX
+ ADDQ DX, R15
+ MOVQ R12, CX
+ RORQ $0x0e, AX
+ MOVQ R12, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R12, CX
+ RORQ $0x29, DX
+ ANDQ R13, CX
+ XORQ AX, DX
+ MOVQ R12, AX
+ NOTQ AX
+ ADDQ DX, R15
+ ANDQ R14, AX
+ XORQ CX, AX
+ ADDQ R15, AX
+ MOVQ R8, DI
+ MOVQ R10, BX
+ RORQ $0x1c, DI
+ MOVQ R8, DX
+ ANDQ R9, BX
+ RORQ $0x22, DX
+ MOVQ R8, CX
+ ANDQ R10, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R8, DX
+ MOVQ R9, CX
+ RORQ $0x27, DX
+ ANDQ R8, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R15
+ ADDQ AX, R11
+ ADDQ AX, R15
+ MOVQ 504(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 400(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 464(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 392(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 520(BP)
+ MOVQ $0xd186b8c721c0c207, DX
+ ADDQ AX, R14
+ MOVQ R11, AX
+ ADDQ DX, R14
+ MOVQ R11, CX
+ RORQ $0x0e, AX
+ MOVQ R11, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R11, CX
+ RORQ $0x29, DX
+ ANDQ R12, CX
+ XORQ AX, DX
+ MOVQ R11, AX
+ NOTQ AX
+ ADDQ DX, R14
+ ANDQ R13, AX
+ XORQ CX, AX
+ ADDQ R14, AX
+ MOVQ R15, DI
+ MOVQ R9, BX
+ RORQ $0x1c, DI
+ MOVQ R15, DX
+ ANDQ R8, BX
+ RORQ $0x22, DX
+ MOVQ R15, CX
+ ANDQ R9, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R15, DX
+ MOVQ R8, CX
+ RORQ $0x27, DX
+ ANDQ R15, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R14
+ ADDQ AX, R10
+ ADDQ AX, R14
+ MOVQ 512(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 408(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 472(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 400(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 528(BP)
+ MOVQ $0xeada7dd6cde0eb1e, DX
+ ADDQ AX, R13
+ MOVQ R10, AX
+ ADDQ DX, R13
+ MOVQ R10, CX
+ RORQ $0x0e, AX
+ MOVQ R10, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R10, CX
+ RORQ $0x29, DX
+ ANDQ R11, CX
+ XORQ AX, DX
+ MOVQ R10, AX
+ NOTQ AX
+ ADDQ DX, R13
+ ANDQ R12, AX
+ XORQ CX, AX
+ ADDQ R13, AX
+ MOVQ R14, DI
+ MOVQ R8, BX
+ RORQ $0x1c, DI
+ MOVQ R14, DX
+ ANDQ R15, BX
+ RORQ $0x22, DX
+ MOVQ R14, CX
+ ANDQ R8, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R14, DX
+ MOVQ R15, CX
+ RORQ $0x27, DX
+ ANDQ R14, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R13
+ ADDQ AX, R9
+ ADDQ AX, R13
+ MOVQ 520(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 416(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 480(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 408(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 536(BP)
+ MOVQ $0xf57d4f7fee6ed178, DX
+ ADDQ AX, R12
+ MOVQ R9, AX
+ ADDQ DX, R12
+ MOVQ R9, CX
+ RORQ $0x0e, AX
+ MOVQ R9, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R9, CX
+ RORQ $0x29, DX
+ ANDQ R10, CX
+ XORQ AX, DX
+ MOVQ R9, AX
+ NOTQ AX
+ ADDQ DX, R12
+ ANDQ R11, AX
+ XORQ CX, AX
+ ADDQ R12, AX
+ MOVQ R13, DI
+ MOVQ R15, BX
+ RORQ $0x1c, DI
+ MOVQ R13, DX
+ ANDQ R14, BX
+ RORQ $0x22, DX
+ MOVQ R13, CX
+ ANDQ R15, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R13, DX
+ MOVQ R14, CX
+ RORQ $0x27, DX
+ ANDQ R13, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R12
+ ADDQ AX, R8
+ ADDQ AX, R12
+ MOVQ 528(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 424(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 488(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 416(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 544(BP)
+ MOVQ $0x06f067aa72176fba, DX
+ ADDQ AX, R11
+ MOVQ R8, AX
+ ADDQ DX, R11
+ MOVQ R8, CX
+ RORQ $0x0e, AX
+ MOVQ R8, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R8, CX
+ RORQ $0x29, DX
+ ANDQ R9, CX
+ XORQ AX, DX
+ MOVQ R8, AX
+ NOTQ AX
+ ADDQ DX, R11
+ ANDQ R10, AX
+ XORQ CX, AX
+ ADDQ R11, AX
+ MOVQ R12, DI
+ MOVQ R14, BX
+ RORQ $0x1c, DI
+ MOVQ R12, DX
+ ANDQ R13, BX
+ RORQ $0x22, DX
+ MOVQ R12, CX
+ ANDQ R14, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R12, DX
+ MOVQ R13, CX
+ RORQ $0x27, DX
+ ANDQ R12, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R11
+ ADDQ AX, R15
+ ADDQ AX, R11
+ MOVQ 536(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 432(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 496(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 424(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 552(BP)
+ MOVQ $0x0a637dc5a2c898a6, DX
+ ADDQ AX, R10
+ MOVQ R15, AX
+ ADDQ DX, R10
+ MOVQ R15, CX
+ RORQ $0x0e, AX
+ MOVQ R15, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R15, CX
+ RORQ $0x29, DX
+ ANDQ R8, CX
+ XORQ AX, DX
+ MOVQ R15, AX
+ NOTQ AX
+ ADDQ DX, R10
+ ANDQ R9, AX
+ XORQ CX, AX
+ ADDQ R10, AX
+ MOVQ R11, DI
+ MOVQ R13, BX
+ RORQ $0x1c, DI
+ MOVQ R11, DX
+ ANDQ R12, BX
+ RORQ $0x22, DX
+ MOVQ R11, CX
+ ANDQ R13, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R11, DX
+ MOVQ R12, CX
+ RORQ $0x27, DX
+ ANDQ R11, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R10
+ ADDQ AX, R14
+ ADDQ AX, R10
+ MOVQ 544(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 440(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 504(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 432(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 560(BP)
+ MOVQ $0x113f9804bef90dae, DX
+ ADDQ AX, R9
+ MOVQ R14, AX
+ ADDQ DX, R9
+ MOVQ R14, CX
+ RORQ $0x0e, AX
+ MOVQ R14, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R14, CX
+ RORQ $0x29, DX
+ ANDQ R15, CX
+ XORQ AX, DX
+ MOVQ R14, AX
+ NOTQ AX
+ ADDQ DX, R9
+ ANDQ R8, AX
+ XORQ CX, AX
+ ADDQ R9, AX
+ MOVQ R10, DI
+ MOVQ R12, BX
+ RORQ $0x1c, DI
+ MOVQ R10, DX
+ ANDQ R11, BX
+ RORQ $0x22, DX
+ MOVQ R10, CX
+ ANDQ R12, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R10, DX
+ MOVQ R11, CX
+ RORQ $0x27, DX
+ ANDQ R10, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R9
+ ADDQ AX, R13
+ ADDQ AX, R9
+ MOVQ 552(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 448(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 512(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 440(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 568(BP)
+ MOVQ $0x1b710b35131c471b, DX
+ ADDQ AX, R8
+ MOVQ R13, AX
+ ADDQ DX, R8
+ MOVQ R13, CX
+ RORQ $0x0e, AX
+ MOVQ R13, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R13, CX
+ RORQ $0x29, DX
+ ANDQ R14, CX
+ XORQ AX, DX
+ MOVQ R13, AX
+ NOTQ AX
+ ADDQ DX, R8
+ ANDQ R15, AX
+ XORQ CX, AX
+ ADDQ R8, AX
+ MOVQ R9, DI
+ MOVQ R11, BX
+ RORQ $0x1c, DI
+ MOVQ R9, DX
+ ANDQ R10, BX
+ RORQ $0x22, DX
+ MOVQ R9, CX
+ ANDQ R11, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R9, DX
+ MOVQ R10, CX
+ RORQ $0x27, DX
+ ANDQ R9, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R8
+ ADDQ AX, R12
+ ADDQ AX, R8
+ MOVQ 560(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 456(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 520(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 448(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 576(BP)
+ MOVQ $0x28db77f523047d84, DX
+ ADDQ AX, R15
+ MOVQ R12, AX
+ ADDQ DX, R15
+ MOVQ R12, CX
+ RORQ $0x0e, AX
+ MOVQ R12, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R12, CX
+ RORQ $0x29, DX
+ ANDQ R13, CX
+ XORQ AX, DX
+ MOVQ R12, AX
+ NOTQ AX
+ ADDQ DX, R15
+ ANDQ R14, AX
+ XORQ CX, AX
+ ADDQ R15, AX
+ MOVQ R8, DI
+ MOVQ R10, BX
+ RORQ $0x1c, DI
+ MOVQ R8, DX
+ ANDQ R9, BX
+ RORQ $0x22, DX
+ MOVQ R8, CX
+ ANDQ R10, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R8, DX
+ MOVQ R9, CX
+ RORQ $0x27, DX
+ ANDQ R8, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R15
+ ADDQ AX, R11
+ ADDQ AX, R15
+ MOVQ 568(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 464(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 528(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 456(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 584(BP)
+ MOVQ $0x32caab7b40c72493, DX
+ ADDQ AX, R14
+ MOVQ R11, AX
+ ADDQ DX, R14
+ MOVQ R11, CX
+ RORQ $0x0e, AX
+ MOVQ R11, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R11, CX
+ RORQ $0x29, DX
+ ANDQ R12, CX
+ XORQ AX, DX
+ MOVQ R11, AX
+ NOTQ AX
+ ADDQ DX, R14
+ ANDQ R13, AX
+ XORQ CX, AX
+ ADDQ R14, AX
+ MOVQ R15, DI
+ MOVQ R9, BX
+ RORQ $0x1c, DI
+ MOVQ R15, DX
+ ANDQ R8, BX
+ RORQ $0x22, DX
+ MOVQ R15, CX
+ ANDQ R9, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R15, DX
+ MOVQ R8, CX
+ RORQ $0x27, DX
+ ANDQ R15, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R14
+ ADDQ AX, R10
+ ADDQ AX, R14
+ MOVQ 576(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 472(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 536(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 464(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 592(BP)
+ MOVQ $0x3c9ebe0a15c9bebc, DX
+ ADDQ AX, R13
+ MOVQ R10, AX
+ ADDQ DX, R13
+ MOVQ R10, CX
+ RORQ $0x0e, AX
+ MOVQ R10, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R10, CX
+ RORQ $0x29, DX
+ ANDQ R11, CX
+ XORQ AX, DX
+ MOVQ R10, AX
+ NOTQ AX
+ ADDQ DX, R13
+ ANDQ R12, AX
+ XORQ CX, AX
+ ADDQ R13, AX
+ MOVQ R14, DI
+ MOVQ R8, BX
+ RORQ $0x1c, DI
+ MOVQ R14, DX
+ ANDQ R15, BX
+ RORQ $0x22, DX
+ MOVQ R14, CX
+ ANDQ R8, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R14, DX
+ MOVQ R15, CX
+ RORQ $0x27, DX
+ ANDQ R14, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R13
+ ADDQ AX, R9
+ ADDQ AX, R13
+ MOVQ 584(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 480(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 544(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 472(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 600(BP)
+ MOVQ $0x431d67c49c100d4c, DX
+ ADDQ AX, R12
+ MOVQ R9, AX
+ ADDQ DX, R12
+ MOVQ R9, CX
+ RORQ $0x0e, AX
+ MOVQ R9, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R9, CX
+ RORQ $0x29, DX
+ ANDQ R10, CX
+ XORQ AX, DX
+ MOVQ R9, AX
+ NOTQ AX
+ ADDQ DX, R12
+ ANDQ R11, AX
+ XORQ CX, AX
+ ADDQ R12, AX
+ MOVQ R13, DI
+ MOVQ R15, BX
+ RORQ $0x1c, DI
+ MOVQ R13, DX
+ ANDQ R14, BX
+ RORQ $0x22, DX
+ MOVQ R13, CX
+ ANDQ R15, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R13, DX
+ MOVQ R14, CX
+ RORQ $0x27, DX
+ ANDQ R13, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R12
+ ADDQ AX, R8
+ ADDQ AX, R12
+ MOVQ 592(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 488(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 552(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 480(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 608(BP)
+ MOVQ $0x4cc5d4becb3e42b6, DX
+ ADDQ AX, R11
+ MOVQ R8, AX
+ ADDQ DX, R11
+ MOVQ R8, CX
+ RORQ $0x0e, AX
+ MOVQ R8, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R8, CX
+ RORQ $0x29, DX
+ ANDQ R9, CX
+ XORQ AX, DX
+ MOVQ R8, AX
+ NOTQ AX
+ ADDQ DX, R11
+ ANDQ R10, AX
+ XORQ CX, AX
+ ADDQ R11, AX
+ MOVQ R12, DI
+ MOVQ R14, BX
+ RORQ $0x1c, DI
+ MOVQ R12, DX
+ ANDQ R13, BX
+ RORQ $0x22, DX
+ MOVQ R12, CX
+ ANDQ R14, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R12, DX
+ MOVQ R13, CX
+ RORQ $0x27, DX
+ ANDQ R12, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R11
+ ADDQ AX, R15
+ ADDQ AX, R11
+ MOVQ 600(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 496(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 560(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 488(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 616(BP)
+ MOVQ $0x597f299cfc657e2a, DX
+ ADDQ AX, R10
+ MOVQ R15, AX
+ ADDQ DX, R10
+ MOVQ R15, CX
+ RORQ $0x0e, AX
+ MOVQ R15, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R15, CX
+ RORQ $0x29, DX
+ ANDQ R8, CX
+ XORQ AX, DX
+ MOVQ R15, AX
+ NOTQ AX
+ ADDQ DX, R10
+ ANDQ R9, AX
+ XORQ CX, AX
+ ADDQ R10, AX
+ MOVQ R11, DI
+ MOVQ R13, BX
+ RORQ $0x1c, DI
+ MOVQ R11, DX
+ ANDQ R12, BX
+ RORQ $0x22, DX
+ MOVQ R11, CX
+ ANDQ R13, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R11, DX
+ MOVQ R12, CX
+ RORQ $0x27, DX
+ ANDQ R11, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R10
+ ADDQ AX, R14
+ ADDQ AX, R10
+ MOVQ 608(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 504(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 568(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 496(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 624(BP)
+ MOVQ $0x5fcb6fab3ad6faec, DX
+ ADDQ AX, R9
+ MOVQ R14, AX
+ ADDQ DX, R9
+ MOVQ R14, CX
+ RORQ $0x0e, AX
+ MOVQ R14, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R14, CX
+ RORQ $0x29, DX
+ ANDQ R15, CX
+ XORQ AX, DX
+ MOVQ R14, AX
+ NOTQ AX
+ ADDQ DX, R9
+ ANDQ R8, AX
+ XORQ CX, AX
+ ADDQ R9, AX
+ MOVQ R10, DI
+ MOVQ R12, BX
+ RORQ $0x1c, DI
+ MOVQ R10, DX
+ ANDQ R11, BX
+ RORQ $0x22, DX
+ MOVQ R10, CX
+ ANDQ R12, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R10, DX
+ MOVQ R11, CX
+ RORQ $0x27, DX
+ ANDQ R10, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R9
+ ADDQ AX, R13
+ ADDQ AX, R9
+ MOVQ 616(BP), AX
+ MOVQ AX, CX
+ RORQ $0x13, AX
+ MOVQ CX, DX
+ RORQ $0x3d, CX
+ SHRQ $0x06, DX
+ MOVQ 512(BP), BX
+ XORQ CX, AX
+ MOVQ BX, CX
+ XORQ DX, AX
+ RORQ $0x01, BX
+ MOVQ CX, DX
+ SHRQ $0x07, DX
+ RORQ $0x08, CX
+ ADDQ 576(BP), AX
+ XORQ CX, BX
+ XORQ DX, BX
+ ADDQ 504(BP), BX
+ ADDQ BX, AX
+ MOVQ AX, 632(BP)
+ MOVQ $0x6c44198c4a475817, DX
+ ADDQ AX, R8
+ MOVQ R13, AX
+ ADDQ DX, R8
+ MOVQ R13, CX
+ RORQ $0x0e, AX
+ MOVQ R13, DX
+ RORQ $0x12, CX
+ XORQ CX, AX
+ MOVQ R13, CX
+ RORQ $0x29, DX
+ ANDQ R14, CX
+ XORQ AX, DX
+ MOVQ R13, AX
+ NOTQ AX
+ ADDQ DX, R8
+ ANDQ R15, AX
+ XORQ CX, AX
+ ADDQ R8, AX
+ MOVQ R9, DI
+ MOVQ R11, BX
+ RORQ $0x1c, DI
+ MOVQ R9, DX
+ ANDQ R10, BX
+ RORQ $0x22, DX
+ MOVQ R9, CX
+ ANDQ R11, CX
+ XORQ DX, DI
+ XORQ CX, BX
+ MOVQ R9, DX
+ MOVQ R10, CX
+ RORQ $0x27, DX
+ ANDQ R9, CX
+ XORQ CX, BX
+ XORQ DX, DI
+ ADDQ DI, BX
+ MOVQ BX, R8
+ ADDQ AX, R12
+ ADDQ AX, R8
+ MOVQ dig+0(FP), BP
+ ADDQ (BP), R8
+ MOVQ R8, (BP)
+ ADDQ 8(BP), R9
+ MOVQ R9, 8(BP)
+ ADDQ 16(BP), R10
+ MOVQ R10, 16(BP)
+ ADDQ 24(BP), R11
+ MOVQ R11, 24(BP)
+ ADDQ 32(BP), R12
+ MOVQ R12, 32(BP)
+ ADDQ 40(BP), R13
+ MOVQ R13, 40(BP)
+ ADDQ 48(BP), R14
+ MOVQ R14, 48(BP)
+ ADDQ 56(BP), R15
+ MOVQ R15, 56(BP)
+ ADDQ $0x80, SI
+ CMPQ SI, 640(SP)
+ JB loop
end:
RET
-// Version below is based on "Fast SHA512 Implementations on Intel
-// Architecture Processors" White-paper
-// https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-sha512-implementations-ia-processors-paper.pdf
-// AVX2 version by Intel, same algorithm in Linux kernel:
-// https://github.com/torvalds/linux/blob/master/arch/x86/crypto/sha512-avx2-asm.S
-
-// James Guilford <james.guilford@intel.com>
-// Kirk Yap <kirk.s.yap@intel.com>
-// Tim Chen <tim.c.chen@linux.intel.com>
-// David Cote <david.m.cote@intel.com>
-// Aleksey Sidorov <aleksey.sidorov@intel.com>
-
-#define YFER_SIZE (4*8)
-#define SRND_SIZE (1*8)
-#define INP_SIZE (1*8)
-
-#define frame_YFER (0)
-#define frame_SRND (frame_YFER + YFER_SIZE)
-#define frame_INP (frame_SRND + SRND_SIZE)
-#define frame_INPEND (frame_INP + INP_SIZE)
-
-#define addm(p1, p2) \
- ADDQ p1, p2; \
- MOVQ p2, p1
-
-#define COPY_YMM_AND_BSWAP(p1, p2, p3) \
- VMOVDQU p2, p1; \
- VPSHUFB p3, p1, p1
-
-#define MY_VPALIGNR(YDST, YSRC1, YSRC2, RVAL) \
- VPERM2F128 $0x3, YSRC2, YSRC1, YDST; \
- VPALIGNR $RVAL, YSRC2, YDST, YDST
-
-DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x00(SB)/8, $0x0001020304050607
-DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x08(SB)/8, $0x08090a0b0c0d0e0f
-DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x10(SB)/8, $0x1011121314151617
-DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x18(SB)/8, $0x18191a1b1c1d1e1f
-
-GLOBL PSHUFFLE_BYTE_FLIP_MASK<>(SB), (NOPTR+RODATA), $32
-
-DATA MASK_YMM_LO<>+0x00(SB)/8, $0x0000000000000000
-DATA MASK_YMM_LO<>+0x08(SB)/8, $0x0000000000000000
-DATA MASK_YMM_LO<>+0x10(SB)/8, $0xFFFFFFFFFFFFFFFF
-DATA MASK_YMM_LO<>+0x18(SB)/8, $0xFFFFFFFFFFFFFFFF
-
-GLOBL MASK_YMM_LO<>(SB), (NOPTR+RODATA), $32
+DATA PSHUFFLE_BYTE_FLIP_MASK<>+0(SB)/8, $0x0001020304050607
+DATA PSHUFFLE_BYTE_FLIP_MASK<>+8(SB)/8, $0x08090a0b0c0d0e0f
+DATA PSHUFFLE_BYTE_FLIP_MASK<>+16(SB)/8, $0x1011121314151617
+DATA PSHUFFLE_BYTE_FLIP_MASK<>+24(SB)/8, $0x18191a1b1c1d1e1f
+GLOBL PSHUFFLE_BYTE_FLIP_MASK<>(SB), RODATA|NOPTR, $32
+// func blockAVX2(dig *digest, p []byte)
+// Requires: AVX, AVX2, BMI2
TEXT ·blockAVX2(SB), NOSPLIT, $56-32
- MOVQ dig+0(FP), SI
- MOVQ p_base+8(FP), DI
- MOVQ p_len+16(FP), DX
-
- SHRQ $7, DX
- SHLQ $7, DX
-
- JZ done_hash
- ADDQ DI, DX
- MOVQ DX, frame_INPEND(SP)
-
- MOVQ (0*8)(SI), AX
- MOVQ (1*8)(SI), BX
- MOVQ (2*8)(SI), CX
- MOVQ (3*8)(SI), R8
- MOVQ (4*8)(SI), DX
- MOVQ (5*8)(SI), R9
- MOVQ (6*8)(SI), R10
- MOVQ (7*8)(SI), R11
-
- VMOVDQU PSHUFFLE_BYTE_FLIP_MASK<>(SB), Y9
+ MOVQ dig+0(FP), SI
+ MOVQ p_base+8(FP), DI
+ MOVQ p_len+16(FP), DX
+ SHRQ $0x07, DX
+ SHLQ $0x07, DX
+ JZ done_hash
+ ADDQ DI, DX
+ MOVQ DX, 48(SP)
+ MOVQ (SI), AX
+ MOVQ 8(SI), BX
+ MOVQ 16(SI), CX
+ MOVQ 24(SI), R8
+ MOVQ 32(SI), DX
+ MOVQ 40(SI), R9
+ MOVQ 48(SI), R10
+ MOVQ 56(SI), R11
+ VMOVDQU PSHUFFLE_BYTE_FLIP_MASK<>+0(SB), Y9
loop0:
- MOVQ ·_K+0(SB), BP
-
- // byte swap first 16 dwords
- COPY_YMM_AND_BSWAP(Y4, (0*32)(DI), Y9)
- COPY_YMM_AND_BSWAP(Y5, (1*32)(DI), Y9)
- COPY_YMM_AND_BSWAP(Y6, (2*32)(DI), Y9)
- COPY_YMM_AND_BSWAP(Y7, (3*32)(DI), Y9)
-
- MOVQ DI, frame_INP(SP)
-
- // schedule 64 input dwords, by doing 12 rounds of 4 each
- MOVQ $4, frame_SRND(SP)
+ MOVQ ·_K+0(SB), BP
+ VMOVDQU (DI), Y4
+ VPSHUFB Y9, Y4, Y4
+ VMOVDQU 32(DI), Y5
+ VPSHUFB Y9, Y5, Y5
+ VMOVDQU 64(DI), Y6
+ VPSHUFB Y9, Y6, Y6
+ VMOVDQU 96(DI), Y7
+ VPSHUFB Y9, Y7, Y7
+ MOVQ DI, 40(SP)
+ MOVQ $0x00000004, 32(SP)
loop1:
- VPADDQ (BP), Y4, Y0
- VMOVDQU Y0, frame_YFER(SP)
-
- MY_VPALIGNR(Y0, Y7, Y6, 8)
-
- VPADDQ Y4, Y0, Y0
-
- MY_VPALIGNR(Y1, Y5, Y4, 8)
-
- VPSRLQ $1, Y1, Y2
- VPSLLQ $(64-1), Y1, Y3
- VPOR Y2, Y3, Y3
-
- VPSRLQ $7, Y1, Y8
-
- MOVQ AX, DI
- RORXQ $41, DX, R13
- RORXQ $18, DX, R14
- ADDQ frame_YFER(SP), R11
- ORQ CX, DI
- MOVQ R9, R15
- RORXQ $34, AX, R12
-
- XORQ R14, R13
- XORQ R10, R15
- RORXQ $14, DX, R14
-
- ANDQ DX, R15
- XORQ R14, R13
- RORXQ $39, AX, R14
- ADDQ R11, R8
-
- ANDQ BX, DI
- XORQ R12, R14
- RORXQ $28, AX, R12
-
- XORQ R10, R15
- XORQ R12, R14
- MOVQ AX, R12
- ANDQ CX, R12
-
- ADDQ R13, R15
- ORQ R12, DI
- ADDQ R14, R11
-
- ADDQ R15, R8
-
- ADDQ R15, R11
- ADDQ DI, R11
-
- VPSRLQ $8, Y1, Y2
- VPSLLQ $(64-8), Y1, Y1
- VPOR Y2, Y1, Y1
-
- VPXOR Y8, Y3, Y3
- VPXOR Y1, Y3, Y1
-
- VPADDQ Y1, Y0, Y0
-
- VPERM2F128 $0x0, Y0, Y0, Y4
-
- VPAND MASK_YMM_LO<>(SB), Y0, Y0
-
+ VPADDQ (BP), Y4, Y0
+ VMOVDQU Y0, (SP)
+ VPERM2F128 $0x03, Y6, Y7, Y0
+ VPALIGNR $0x08, Y6, Y0, Y0
+ VPADDQ Y4, Y0, Y0
+ VPERM2F128 $0x03, Y4, Y5, Y1
+ VPALIGNR $0x08, Y4, Y1, Y1
+ VPSRLQ $0x01, Y1, Y2
+ VPSLLQ $0x3f, Y1, Y3
+ VPOR Y2, Y3, Y3
+ VPSRLQ $0x07, Y1, Y8
+ MOVQ AX, DI
+ RORXQ $0x29, DX, R13
+ RORXQ $0x12, DX, R14
+ ADDQ (SP), R11
+ ORQ CX, DI
+ MOVQ R9, R15
+ RORXQ $0x22, AX, R12
+ XORQ R14, R13
+ XORQ R10, R15
+ RORXQ $0x0e, DX, R14
+ ANDQ DX, R15
+ XORQ R14, R13
+ RORXQ $0x27, AX, R14
+ ADDQ R11, R8
+ ANDQ BX, DI
+ XORQ R12, R14
+ RORXQ $0x1c, AX, R12
+ XORQ R10, R15
+ XORQ R12, R14
+ MOVQ AX, R12
+ ANDQ CX, R12
+ ADDQ R13, R15
+ ORQ R12, DI
+ ADDQ R14, R11
+ ADDQ R15, R8
+ ADDQ R15, R11
+ ADDQ DI, R11
+ VPSRLQ $0x08, Y1, Y2
+ VPSLLQ $0x38, Y1, Y1
+ VPOR Y2, Y1, Y1
+ VPXOR Y8, Y3, Y3
+ VPXOR Y1, Y3, Y1
+ VPADDQ Y1, Y0, Y0
+ VPERM2F128 $0x00, Y0, Y0, Y4
+ VPAND MASK_YMM_LO<>+0(SB), Y0, Y0
VPERM2F128 $0x11, Y7, Y7, Y2
- VPSRLQ $6, Y2, Y8
-
- MOVQ R11, DI
- RORXQ $41, R8, R13
- RORXQ $18, R8, R14
- ADDQ 1*8+frame_YFER(SP), R10
- ORQ BX, DI
-
- MOVQ DX, R15
- RORXQ $34, R11, R12
- XORQ R14, R13
- XORQ R9, R15
-
- RORXQ $14, R8, R14
- XORQ R14, R13
- RORXQ $39, R11, R14
- ANDQ R8, R15
- ADDQ R10, CX
-
- ANDQ AX, DI
- XORQ R12, R14
-
- RORXQ $28, R11, R12
- XORQ R9, R15
-
- XORQ R12, R14
- MOVQ R11, R12
- ANDQ BX, R12
- ADDQ R13, R15
-
- ORQ R12, DI
- ADDQ R14, R10
-
- ADDQ R15, CX
- ADDQ R15, R10
- ADDQ DI, R10
-
- VPSRLQ $19, Y2, Y3
- VPSLLQ $(64-19), Y2, Y1
- VPOR Y1, Y3, Y3
- VPXOR Y3, Y8, Y8
- VPSRLQ $61, Y2, Y3
- VPSLLQ $(64-61), Y2, Y1
- VPOR Y1, Y3, Y3
- VPXOR Y3, Y8, Y8
-
- VPADDQ Y8, Y4, Y4
-
- VPSRLQ $6, Y4, Y8
-
- MOVQ R10, DI
- RORXQ $41, CX, R13
- ADDQ 2*8+frame_YFER(SP), R9
-
- RORXQ $18, CX, R14
- ORQ AX, DI
- MOVQ R8, R15
- XORQ DX, R15
-
- RORXQ $34, R10, R12
- XORQ R14, R13
- ANDQ CX, R15
-
- RORXQ $14, CX, R14
- ADDQ R9, BX
- ANDQ R11, DI
-
- XORQ R14, R13
- RORXQ $39, R10, R14
- XORQ DX, R15
-
- XORQ R12, R14
- RORXQ $28, R10, R12
-
- XORQ R12, R14
- MOVQ R10, R12
- ANDQ AX, R12
- ADDQ R13, R15
-
- ORQ R12, DI
- ADDQ R14, R9
- ADDQ R15, BX
- ADDQ R15, R9
-
- ADDQ DI, R9
-
- VPSRLQ $19, Y4, Y3
- VPSLLQ $(64-19), Y4, Y1
- VPOR Y1, Y3, Y3
- VPXOR Y3, Y8, Y8
- VPSRLQ $61, Y4, Y3
- VPSLLQ $(64-61), Y4, Y1
- VPOR Y1, Y3, Y3
- VPXOR Y3, Y8, Y8
-
- VPADDQ Y8, Y0, Y2
-
- VPBLENDD $0xF0, Y2, Y4, Y4
-
- MOVQ R9, DI
- RORXQ $41, BX, R13
- RORXQ $18, BX, R14
- ADDQ 3*8+frame_YFER(SP), DX
- ORQ R11, DI
-
- MOVQ CX, R15
- RORXQ $34, R9, R12
- XORQ R14, R13
- XORQ R8, R15
-
- RORXQ $14, BX, R14
- ANDQ BX, R15
- ADDQ DX, AX
- ANDQ R10, DI
-
- XORQ R14, R13
- XORQ R8, R15
-
- RORXQ $39, R9, R14
- ADDQ R13, R15
-
- XORQ R12, R14
- ADDQ R15, AX
-
- RORXQ $28, R9, R12
-
- XORQ R12, R14
- MOVQ R9, R12
- ANDQ R11, R12
- ORQ R12, DI
-
- ADDQ R14, DX
- ADDQ R15, DX
- ADDQ DI, DX
-
- VPADDQ 1*32(BP), Y5, Y0
- VMOVDQU Y0, frame_YFER(SP)
-
- MY_VPALIGNR(Y0, Y4, Y7, 8)
-
- VPADDQ Y5, Y0, Y0
-
- MY_VPALIGNR(Y1, Y6, Y5, 8)
-
- VPSRLQ $1, Y1, Y2
- VPSLLQ $(64-1), Y1, Y3
- VPOR Y2, Y3, Y3
-
- VPSRLQ $7, Y1, Y8
-
- MOVQ DX, DI
- RORXQ $41, AX, R13
- RORXQ $18, AX, R14
- ADDQ frame_YFER(SP), R8
- ORQ R10, DI
- MOVQ BX, R15
- RORXQ $34, DX, R12
-
- XORQ R14, R13
- XORQ CX, R15
- RORXQ $14, AX, R14
-
- ANDQ AX, R15
- XORQ R14, R13
- RORXQ $39, DX, R14
- ADDQ R8, R11
-
- ANDQ R9, DI
- XORQ R12, R14
- RORXQ $28, DX, R12
-
- XORQ CX, R15
- XORQ R12, R14
- MOVQ DX, R12
- ANDQ R10, R12
-
- ADDQ R13, R15
- ORQ R12, DI
- ADDQ R14, R8
-
- ADDQ R15, R11
-
- ADDQ R15, R8
- ADDQ DI, R8
-
- VPSRLQ $8, Y1, Y2
- VPSLLQ $(64-8), Y1, Y1
- VPOR Y2, Y1, Y1
-
- VPXOR Y8, Y3, Y3
- VPXOR Y1, Y3, Y1
-
- VPADDQ Y1, Y0, Y0
-
- VPERM2F128 $0x0, Y0, Y0, Y5
-
- VPAND MASK_YMM_LO<>(SB), Y0, Y0
-
+ VPSRLQ $0x06, Y2, Y8
+ MOVQ R11, DI
+ RORXQ $0x29, R8, R13
+ RORXQ $0x12, R8, R14
+ ADDQ 8(SP), R10
+ ORQ BX, DI
+ MOVQ DX, R15
+ RORXQ $0x22, R11, R12
+ XORQ R14, R13
+ XORQ R9, R15
+ RORXQ $0x0e, R8, R14
+ XORQ R14, R13
+ RORXQ $0x27, R11, R14
+ ANDQ R8, R15
+ ADDQ R10, CX
+ ANDQ AX, DI
+ XORQ R12, R14
+ RORXQ $0x1c, R11, R12
+ XORQ R9, R15
+ XORQ R12, R14
+ MOVQ R11, R12
+ ANDQ BX, R12
+ ADDQ R13, R15
+ ORQ R12, DI
+ ADDQ R14, R10
+ ADDQ R15, CX
+ ADDQ R15, R10
+ ADDQ DI, R10
+ VPSRLQ $0x13, Y2, Y3
+ VPSLLQ $0x2d, Y2, Y1
+ VPOR Y1, Y3, Y3
+ VPXOR Y3, Y8, Y8
+ VPSRLQ $0x3d, Y2, Y3
+ VPSLLQ $0x03, Y2, Y1
+ VPOR Y1, Y3, Y3
+ VPXOR Y3, Y8, Y8
+ VPADDQ Y8, Y4, Y4
+ VPSRLQ $0x06, Y4, Y8
+ MOVQ R10, DI
+ RORXQ $0x29, CX, R13
+ ADDQ 16(SP), R9
+ RORXQ $0x12, CX, R14
+ ORQ AX, DI
+ MOVQ R8, R15
+ XORQ DX, R15
+ RORXQ $0x22, R10, R12
+ XORQ R14, R13
+ ANDQ CX, R15
+ RORXQ $0x0e, CX, R14
+ ADDQ R9, BX
+ ANDQ R11, DI
+ XORQ R14, R13
+ RORXQ $0x27, R10, R14
+ XORQ DX, R15
+ XORQ R12, R14
+ RORXQ $0x1c, R10, R12
+ XORQ R12, R14
+ MOVQ R10, R12
+ ANDQ AX, R12
+ ADDQ R13, R15
+ ORQ R12, DI
+ ADDQ R14, R9
+ ADDQ R15, BX
+ ADDQ R15, R9
+ ADDQ DI, R9
+ VPSRLQ $0x13, Y4, Y3
+ VPSLLQ $0x2d, Y4, Y1
+ VPOR Y1, Y3, Y3
+ VPXOR Y3, Y8, Y8
+ VPSRLQ $0x3d, Y4, Y3
+ VPSLLQ $0x03, Y4, Y1
+ VPOR Y1, Y3, Y3
+ VPXOR Y3, Y8, Y8
+ VPADDQ Y8, Y0, Y2
+ VPBLENDD $0xf0, Y2, Y4, Y4
+ MOVQ R9, DI
+ RORXQ $0x29, BX, R13
+ RORXQ $0x12, BX, R14
+ ADDQ 24(SP), DX
+ ORQ R11, DI
+ MOVQ CX, R15
+ RORXQ $0x22, R9, R12
+ XORQ R14, R13
+ XORQ R8, R15
+ RORXQ $0x0e, BX, R14
+ ANDQ BX, R15
+ ADDQ DX, AX
+ ANDQ R10, DI
+ XORQ R14, R13
+ XORQ R8, R15
+ RORXQ $0x27, R9, R14
+ ADDQ R13, R15
+ XORQ R12, R14
+ ADDQ R15, AX
+ RORXQ $0x1c, R9, R12
+ XORQ R12, R14
+ MOVQ R9, R12
+ ANDQ R11, R12
+ ORQ R12, DI
+ ADDQ R14, DX
+ ADDQ R15, DX
+ ADDQ DI, DX
+ VPADDQ 32(BP), Y5, Y0
+ VMOVDQU Y0, (SP)
+ VPERM2F128 $0x03, Y7, Y4, Y0
+ VPALIGNR $0x08, Y7, Y0, Y0
+ VPADDQ Y5, Y0, Y0
+ VPERM2F128 $0x03, Y5, Y6, Y1
+ VPALIGNR $0x08, Y5, Y1, Y1
+ VPSRLQ $0x01, Y1, Y2
+ VPSLLQ $0x3f, Y1, Y3
+ VPOR Y2, Y3, Y3
+ VPSRLQ $0x07, Y1, Y8
+ MOVQ DX, DI
+ RORXQ $0x29, AX, R13
+ RORXQ $0x12, AX, R14
+ ADDQ (SP), R8
+ ORQ R10, DI
+ MOVQ BX, R15
+ RORXQ $0x22, DX, R12
+ XORQ R14, R13
+ XORQ CX, R15
+ RORXQ $0x0e, AX, R14
+ ANDQ AX, R15
+ XORQ R14, R13
+ RORXQ $0x27, DX, R14
+ ADDQ R8, R11
+ ANDQ R9, DI
+ XORQ R12, R14
+ RORXQ $0x1c, DX, R12
+ XORQ CX, R15
+ XORQ R12, R14
+ MOVQ DX, R12
+ ANDQ R10, R12
+ ADDQ R13, R15
+ ORQ R12, DI
+ ADDQ R14, R8
+ ADDQ R15, R11
+ ADDQ R15, R8
+ ADDQ DI, R8
+ VPSRLQ $0x08, Y1, Y2
+ VPSLLQ $0x38, Y1, Y1
+ VPOR Y2, Y1, Y1
+ VPXOR Y8, Y3, Y3
+ VPXOR Y1, Y3, Y1
+ VPADDQ Y1, Y0, Y0
+ VPERM2F128 $0x00, Y0, Y0, Y5
+ VPAND MASK_YMM_LO<>+0(SB), Y0, Y0
VPERM2F128 $0x11, Y4, Y4, Y2
- VPSRLQ $6, Y2, Y8
-
- MOVQ R8, DI
- RORXQ $41, R11, R13
- RORXQ $18, R11, R14
- ADDQ 1*8+frame_YFER(SP), CX
- ORQ R9, DI
-
- MOVQ AX, R15
- RORXQ $34, R8, R12
- XORQ R14, R13
- XORQ BX, R15
-
- RORXQ $14, R11, R14
- XORQ R14, R13
- RORXQ $39, R8, R14
- ANDQ R11, R15
- ADDQ CX, R10
-
- ANDQ DX, DI
- XORQ R12, R14
-
- RORXQ $28, R8, R12
- XORQ BX, R15
-
- XORQ R12, R14
- MOVQ R8, R12
- ANDQ R9, R12
- ADDQ R13, R15
-
- ORQ R12, DI
- ADDQ R14, CX
-
- ADDQ R15, R10
- ADDQ R15, CX
- ADDQ DI, CX
-
- VPSRLQ $19, Y2, Y3
- VPSLLQ $(64-19), Y2, Y1
- VPOR Y1, Y3, Y3
- VPXOR Y3, Y8, Y8
- VPSRLQ $61, Y2, Y3
- VPSLLQ $(64-61), Y2, Y1
- VPOR Y1, Y3, Y3
- VPXOR Y3, Y8, Y8
-
- VPADDQ Y8, Y5, Y5
-
- VPSRLQ $6, Y5, Y8
-
- MOVQ CX, DI
- RORXQ $41, R10, R13
- ADDQ 2*8+frame_YFER(SP), BX
-
- RORXQ $18, R10, R14
- ORQ DX, DI
- MOVQ R11, R15
- XORQ AX, R15
-
- RORXQ $34, CX, R12
- XORQ R14, R13
- ANDQ R10, R15
-
- RORXQ $14, R10, R14
- ADDQ BX, R9
- ANDQ R8, DI
-
- XORQ R14, R13
- RORXQ $39, CX, R14
- XORQ AX, R15
-
- XORQ R12, R14
- RORXQ $28, CX, R12
-
- XORQ R12, R14
- MOVQ CX, R12
- ANDQ DX, R12
- ADDQ R13, R15
-
- ORQ R12, DI
- ADDQ R14, BX
- ADDQ R15, R9
- ADDQ R15, BX
-
- ADDQ DI, BX
-
- VPSRLQ $19, Y5, Y3
- VPSLLQ $(64-19), Y5, Y1
- VPOR Y1, Y3, Y3
- VPXOR Y3, Y8, Y8
- VPSRLQ $61, Y5, Y3
- VPSLLQ $(64-61), Y5, Y1
- VPOR Y1, Y3, Y3
- VPXOR Y3, Y8, Y8
-
- VPADDQ Y8, Y0, Y2
-
- VPBLENDD $0xF0, Y2, Y5, Y5
-
- MOVQ BX, DI
- RORXQ $41, R9, R13
- RORXQ $18, R9, R14
- ADDQ 3*8+frame_YFER(SP), AX
- ORQ R8, DI
-
- MOVQ R10, R15
- RORXQ $34, BX, R12
- XORQ R14, R13
- XORQ R11, R15
-
- RORXQ $14, R9, R14
- ANDQ R9, R15
- ADDQ AX, DX
- ANDQ CX, DI
-
- XORQ R14, R13
- XORQ R11, R15
-
- RORXQ $39, BX, R14
- ADDQ R13, R15
-
- XORQ R12, R14
- ADDQ R15, DX
-
- RORXQ $28, BX, R12
-
- XORQ R12, R14
- MOVQ BX, R12
- ANDQ R8, R12
- ORQ R12, DI
-
- ADDQ R14, AX
- ADDQ R15, AX
- ADDQ DI, AX
-
- VPADDQ 2*32(BP), Y6, Y0
- VMOVDQU Y0, frame_YFER(SP)
-
- MY_VPALIGNR(Y0, Y5, Y4, 8)
-
- VPADDQ Y6, Y0, Y0
-
- MY_VPALIGNR(Y1, Y7, Y6, 8)
-
- VPSRLQ $1, Y1, Y2
- VPSLLQ $(64-1), Y1, Y3
- VPOR Y2, Y3, Y3
-
- VPSRLQ $7, Y1, Y8
-
- MOVQ AX, DI
- RORXQ $41, DX, R13
- RORXQ $18, DX, R14
- ADDQ frame_YFER(SP), R11
- ORQ CX, DI
- MOVQ R9, R15
- RORXQ $34, AX, R12
-
- XORQ R14, R13
- XORQ R10, R15
- RORXQ $14, DX, R14
-
- ANDQ DX, R15
- XORQ R14, R13
- RORXQ $39, AX, R14
- ADDQ R11, R8
-
- ANDQ BX, DI
- XORQ R12, R14
- RORXQ $28, AX, R12
-
- XORQ R10, R15
- XORQ R12, R14
- MOVQ AX, R12
- ANDQ CX, R12
-
- ADDQ R13, R15
- ORQ R12, DI
- ADDQ R14, R11
-
- ADDQ R15, R8
-
- ADDQ R15, R11
- ADDQ DI, R11
-
- VPSRLQ $8, Y1, Y2
- VPSLLQ $(64-8), Y1, Y1
- VPOR Y2, Y1, Y1
-
- VPXOR Y8, Y3, Y3
- VPXOR Y1, Y3, Y1
-
- VPADDQ Y1, Y0, Y0
-
- VPERM2F128 $0x0, Y0, Y0, Y6
-
- VPAND MASK_YMM_LO<>(SB), Y0, Y0
-
+ VPSRLQ $0x06, Y2, Y8
+ MOVQ R8, DI
+ RORXQ $0x29, R11, R13
+ RORXQ $0x12, R11, R14
+ ADDQ 8(SP), CX
+ ORQ R9, DI
+ MOVQ AX, R15
+ RORXQ $0x22, R8, R12
+ XORQ R14, R13
+ XORQ BX, R15
+ RORXQ $0x0e, R11, R14
+ XORQ R14, R13
+ RORXQ $0x27, R8, R14
+ ANDQ R11, R15
+ ADDQ CX, R10
+ ANDQ DX, DI
+ XORQ R12, R14
+ RORXQ $0x1c, R8, R12
+ XORQ BX, R15
+ XORQ R12, R14
+ MOVQ R8, R12
+ ANDQ R9, R12
+ ADDQ R13, R15
+ ORQ R12, DI
+ ADDQ R14, CX
+ ADDQ R15, R10
+ ADDQ R15, CX
+ ADDQ DI, CX
+ VPSRLQ $0x13, Y2, Y3
+ VPSLLQ $0x2d, Y2, Y1
+ VPOR Y1, Y3, Y3
+ VPXOR Y3, Y8, Y8
+ VPSRLQ $0x3d, Y2, Y3
+ VPSLLQ $0x03, Y2, Y1
+ VPOR Y1, Y3, Y3
+ VPXOR Y3, Y8, Y8
+ VPADDQ Y8, Y5, Y5
+ VPSRLQ $0x06, Y5, Y8
+ MOVQ CX, DI
+ RORXQ $0x29, R10, R13
+ ADDQ 16(SP), BX
+ RORXQ $0x12, R10, R14
+ ORQ DX, DI
+ MOVQ R11, R15
+ XORQ AX, R15
+ RORXQ $0x22, CX, R12
+ XORQ R14, R13
+ ANDQ R10, R15
+ RORXQ $0x0e, R10, R14
+ ADDQ BX, R9
+ ANDQ R8, DI
+ XORQ R14, R13
+ RORXQ $0x27, CX, R14
+ XORQ AX, R15
+ XORQ R12, R14
+ RORXQ $0x1c, CX, R12
+ XORQ R12, R14
+ MOVQ CX, R12
+ ANDQ DX, R12
+ ADDQ R13, R15
+ ORQ R12, DI
+ ADDQ R14, BX
+ ADDQ R15, R9
+ ADDQ R15, BX
+ ADDQ DI, BX
+ VPSRLQ $0x13, Y5, Y3
+ VPSLLQ $0x2d, Y5, Y1
+ VPOR Y1, Y3, Y3
+ VPXOR Y3, Y8, Y8
+ VPSRLQ $0x3d, Y5, Y3
+ VPSLLQ $0x03, Y5, Y1
+ VPOR Y1, Y3, Y3
+ VPXOR Y3, Y8, Y8
+ VPADDQ Y8, Y0, Y2
+ VPBLENDD $0xf0, Y2, Y5, Y5
+ MOVQ BX, DI
+ RORXQ $0x29, R9, R13
+ RORXQ $0x12, R9, R14
+ ADDQ 24(SP), AX
+ ORQ R8, DI
+ MOVQ R10, R15
+ RORXQ $0x22, BX, R12
+ XORQ R14, R13
+ XORQ R11, R15
+ RORXQ $0x0e, R9, R14
+ ANDQ R9, R15
+ ADDQ AX, DX
+ ANDQ CX, DI
+ XORQ R14, R13
+ XORQ R11, R15
+ RORXQ $0x27, BX, R14
+ ADDQ R13, R15
+ XORQ R12, R14
+ ADDQ R15, DX
+ RORXQ $0x1c, BX, R12
+ XORQ R12, R14
+ MOVQ BX, R12
+ ANDQ R8, R12
+ ORQ R12, DI
+ ADDQ R14, AX
+ ADDQ R15, AX
+ ADDQ DI, AX
+ VPADDQ 64(BP), Y6, Y0
+ VMOVDQU Y0, (SP)
+ VPERM2F128 $0x03, Y4, Y5, Y0
+ VPALIGNR $0x08, Y4, Y0, Y0
+ VPADDQ Y6, Y0, Y0
+ VPERM2F128 $0x03, Y6, Y7, Y1
+ VPALIGNR $0x08, Y6, Y1, Y1
+ VPSRLQ $0x01, Y1, Y2
+ VPSLLQ $0x3f, Y1, Y3
+ VPOR Y2, Y3, Y3
+ VPSRLQ $0x07, Y1, Y8
+ MOVQ AX, DI
+ RORXQ $0x29, DX, R13
+ RORXQ $0x12, DX, R14
+ ADDQ (SP), R11
+ ORQ CX, DI
+ MOVQ R9, R15
+ RORXQ $0x22, AX, R12
+ XORQ R14, R13
+ XORQ R10, R15
+ RORXQ $0x0e, DX, R14
+ ANDQ DX, R15
+ XORQ R14, R13
+ RORXQ $0x27, AX, R14
+ ADDQ R11, R8
+ ANDQ BX, DI
+ XORQ R12, R14
+ RORXQ $0x1c, AX, R12
+ XORQ R10, R15
+ XORQ R12, R14
+ MOVQ AX, R12
+ ANDQ CX, R12
+ ADDQ R13, R15
+ ORQ R12, DI
+ ADDQ R14, R11
+ ADDQ R15, R8
+ ADDQ R15, R11
+ ADDQ DI, R11
+ VPSRLQ $0x08, Y1, Y2
+ VPSLLQ $0x38, Y1, Y1
+ VPOR Y2, Y1, Y1
+ VPXOR Y8, Y3, Y3
+ VPXOR Y1, Y3, Y1
+ VPADDQ Y1, Y0, Y0
+ VPERM2F128 $0x00, Y0, Y0, Y6
+ VPAND MASK_YMM_LO<>+0(SB), Y0, Y0
VPERM2F128 $0x11, Y5, Y5, Y2
- VPSRLQ $6, Y2, Y8
-
- MOVQ R11, DI
- RORXQ $41, R8, R13
- RORXQ $18, R8, R14
- ADDQ 1*8+frame_YFER(SP), R10
- ORQ BX, DI
-
- MOVQ DX, R15
- RORXQ $34, R11, R12
- XORQ R14, R13
- XORQ R9, R15
-
- RORXQ $14, R8, R14
- XORQ R14, R13
- RORXQ $39, R11, R14
- ANDQ R8, R15
- ADDQ R10, CX
-
- ANDQ AX, DI
- XORQ R12, R14
-
- RORXQ $28, R11, R12
- XORQ R9, R15
-
- XORQ R12, R14
- MOVQ R11, R12
- ANDQ BX, R12
- ADDQ R13, R15
-
- ORQ R12, DI
- ADDQ R14, R10
-
- ADDQ R15, CX
- ADDQ R15, R10
- ADDQ DI, R10
-
- VPSRLQ $19, Y2, Y3
- VPSLLQ $(64-19), Y2, Y1
- VPOR Y1, Y3, Y3
- VPXOR Y3, Y8, Y8
- VPSRLQ $61, Y2, Y3
- VPSLLQ $(64-61), Y2, Y1
- VPOR Y1, Y3, Y3
- VPXOR Y3, Y8, Y8
-
- VPADDQ Y8, Y6, Y6
-
- VPSRLQ $6, Y6, Y8
-
- MOVQ R10, DI
- RORXQ $41, CX, R13
- ADDQ 2*8+frame_YFER(SP), R9
-
- RORXQ $18, CX, R14
- ORQ AX, DI
- MOVQ R8, R15
- XORQ DX, R15
-
- RORXQ $34, R10, R12
- XORQ R14, R13
- ANDQ CX, R15
-
- RORXQ $14, CX, R14
- ADDQ R9, BX
- ANDQ R11, DI
-
- XORQ R14, R13
- RORXQ $39, R10, R14
- XORQ DX, R15
-
- XORQ R12, R14
- RORXQ $28, R10, R12
-
- XORQ R12, R14
- MOVQ R10, R12
- ANDQ AX, R12
- ADDQ R13, R15
-
- ORQ R12, DI
- ADDQ R14, R9
- ADDQ R15, BX
- ADDQ R15, R9
-
- ADDQ DI, R9
-
- VPSRLQ $19, Y6, Y3
- VPSLLQ $(64-19), Y6, Y1
- VPOR Y1, Y3, Y3
- VPXOR Y3, Y8, Y8
- VPSRLQ $61, Y6, Y3
- VPSLLQ $(64-61), Y6, Y1
- VPOR Y1, Y3, Y3
- VPXOR Y3, Y8, Y8
-
- VPADDQ Y8, Y0, Y2
-
- VPBLENDD $0xF0, Y2, Y6, Y6
-
- MOVQ R9, DI
- RORXQ $41, BX, R13
- RORXQ $18, BX, R14
- ADDQ 3*8+frame_YFER(SP), DX
- ORQ R11, DI
-
- MOVQ CX, R15
- RORXQ $34, R9, R12
- XORQ R14, R13
- XORQ R8, R15
-
- RORXQ $14, BX, R14
- ANDQ BX, R15
- ADDQ DX, AX
- ANDQ R10, DI
-
- XORQ R14, R13
- XORQ R8, R15
-
- RORXQ $39, R9, R14
- ADDQ R13, R15
-
- XORQ R12, R14
- ADDQ R15, AX
-
- RORXQ $28, R9, R12
-
- XORQ R12, R14
- MOVQ R9, R12
- ANDQ R11, R12
- ORQ R12, DI
-
- ADDQ R14, DX
- ADDQ R15, DX
- ADDQ DI, DX
-
- VPADDQ 3*32(BP), Y7, Y0
- VMOVDQU Y0, frame_YFER(SP)
- ADDQ $(4*32), BP
-
- MY_VPALIGNR(Y0, Y6, Y5, 8)
-
- VPADDQ Y7, Y0, Y0
-
- MY_VPALIGNR(Y1, Y4, Y7, 8)
-
- VPSRLQ $1, Y1, Y2
- VPSLLQ $(64-1), Y1, Y3
- VPOR Y2, Y3, Y3
-
- VPSRLQ $7, Y1, Y8
-
- MOVQ DX, DI
- RORXQ $41, AX, R13
- RORXQ $18, AX, R14
- ADDQ frame_YFER(SP), R8
- ORQ R10, DI
- MOVQ BX, R15
- RORXQ $34, DX, R12
-
- XORQ R14, R13
- XORQ CX, R15
- RORXQ $14, AX, R14
-
- ANDQ AX, R15
- XORQ R14, R13
- RORXQ $39, DX, R14
- ADDQ R8, R11
-
- ANDQ R9, DI
- XORQ R12, R14
- RORXQ $28, DX, R12
-
- XORQ CX, R15
- XORQ R12, R14
- MOVQ DX, R12
- ANDQ R10, R12
-
- ADDQ R13, R15
- ORQ R12, DI
- ADDQ R14, R8
-
- ADDQ R15, R11
-
- ADDQ R15, R8
- ADDQ DI, R8
-
- VPSRLQ $8, Y1, Y2
- VPSLLQ $(64-8), Y1, Y1
- VPOR Y2, Y1, Y1
-
- VPXOR Y8, Y3, Y3
- VPXOR Y1, Y3, Y1
-
- VPADDQ Y1, Y0, Y0
-
- VPERM2F128 $0x0, Y0, Y0, Y7
-
- VPAND MASK_YMM_LO<>(SB), Y0, Y0
-
+ VPSRLQ $0x06, Y2, Y8
+ MOVQ R11, DI
+ RORXQ $0x29, R8, R13
+ RORXQ $0x12, R8, R14
+ ADDQ 8(SP), R10
+ ORQ BX, DI
+ MOVQ DX, R15
+ RORXQ $0x22, R11, R12
+ XORQ R14, R13
+ XORQ R9, R15
+ RORXQ $0x0e, R8, R14
+ XORQ R14, R13
+ RORXQ $0x27, R11, R14
+ ANDQ R8, R15
+ ADDQ R10, CX
+ ANDQ AX, DI
+ XORQ R12, R14
+ RORXQ $0x1c, R11, R12
+ XORQ R9, R15
+ XORQ R12, R14
+ MOVQ R11, R12
+ ANDQ BX, R12
+ ADDQ R13, R15
+ ORQ R12, DI
+ ADDQ R14, R10
+ ADDQ R15, CX
+ ADDQ R15, R10
+ ADDQ DI, R10
+ VPSRLQ $0x13, Y2, Y3
+ VPSLLQ $0x2d, Y2, Y1
+ VPOR Y1, Y3, Y3
+ VPXOR Y3, Y8, Y8
+ VPSRLQ $0x3d, Y2, Y3
+ VPSLLQ $0x03, Y2, Y1
+ VPOR Y1, Y3, Y3
+ VPXOR Y3, Y8, Y8
+ VPADDQ Y8, Y6, Y6
+ VPSRLQ $0x06, Y6, Y8
+ MOVQ R10, DI
+ RORXQ $0x29, CX, R13
+ ADDQ 16(SP), R9
+ RORXQ $0x12, CX, R14
+ ORQ AX, DI
+ MOVQ R8, R15
+ XORQ DX, R15
+ RORXQ $0x22, R10, R12
+ XORQ R14, R13
+ ANDQ CX, R15
+ RORXQ $0x0e, CX, R14
+ ADDQ R9, BX
+ ANDQ R11, DI
+ XORQ R14, R13
+ RORXQ $0x27, R10, R14
+ XORQ DX, R15
+ XORQ R12, R14
+ RORXQ $0x1c, R10, R12
+ XORQ R12, R14
+ MOVQ R10, R12
+ ANDQ AX, R12
+ ADDQ R13, R15
+ ORQ R12, DI
+ ADDQ R14, R9
+ ADDQ R15, BX
+ ADDQ R15, R9
+ ADDQ DI, R9
+ VPSRLQ $0x13, Y6, Y3
+ VPSLLQ $0x2d, Y6, Y1
+ VPOR Y1, Y3, Y3
+ VPXOR Y3, Y8, Y8
+ VPSRLQ $0x3d, Y6, Y3
+ VPSLLQ $0x03, Y6, Y1
+ VPOR Y1, Y3, Y3
+ VPXOR Y3, Y8, Y8
+ VPADDQ Y8, Y0, Y2
+ VPBLENDD $0xf0, Y2, Y6, Y6
+ MOVQ R9, DI
+ RORXQ $0x29, BX, R13
+ RORXQ $0x12, BX, R14
+ ADDQ 24(SP), DX
+ ORQ R11, DI
+ MOVQ CX, R15
+ RORXQ $0x22, R9, R12
+ XORQ R14, R13
+ XORQ R8, R15
+ RORXQ $0x0e, BX, R14
+ ANDQ BX, R15
+ ADDQ DX, AX
+ ANDQ R10, DI
+ XORQ R14, R13
+ XORQ R8, R15
+ RORXQ $0x27, R9, R14
+ ADDQ R13, R15
+ XORQ R12, R14
+ ADDQ R15, AX
+ RORXQ $0x1c, R9, R12
+ XORQ R12, R14
+ MOVQ R9, R12
+ ANDQ R11, R12
+ ORQ R12, DI
+ ADDQ R14, DX
+ ADDQ R15, DX
+ ADDQ DI, DX
+ VPADDQ 96(BP), Y7, Y0
+ VMOVDQU Y0, (SP)
+ ADDQ $0x80, BP
+ VPERM2F128 $0x03, Y5, Y6, Y0
+ VPALIGNR $0x08, Y5, Y0, Y0
+ VPADDQ Y7, Y0, Y0
+ VPERM2F128 $0x03, Y7, Y4, Y1
+ VPALIGNR $0x08, Y7, Y1, Y1
+ VPSRLQ $0x01, Y1, Y2
+ VPSLLQ $0x3f, Y1, Y3
+ VPOR Y2, Y3, Y3
+ VPSRLQ $0x07, Y1, Y8
+ MOVQ DX, DI
+ RORXQ $0x29, AX, R13
+ RORXQ $0x12, AX, R14
+ ADDQ (SP), R8
+ ORQ R10, DI
+ MOVQ BX, R15
+ RORXQ $0x22, DX, R12
+ XORQ R14, R13
+ XORQ CX, R15
+ RORXQ $0x0e, AX, R14
+ ANDQ AX, R15
+ XORQ R14, R13
+ RORXQ $0x27, DX, R14
+ ADDQ R8, R11
+ ANDQ R9, DI
+ XORQ R12, R14
+ RORXQ $0x1c, DX, R12
+ XORQ CX, R15
+ XORQ R12, R14
+ MOVQ DX, R12
+ ANDQ R10, R12
+ ADDQ R13, R15
+ ORQ R12, DI
+ ADDQ R14, R8
+ ADDQ R15, R11
+ ADDQ R15, R8
+ ADDQ DI, R8
+ VPSRLQ $0x08, Y1, Y2
+ VPSLLQ $0x38, Y1, Y1
+ VPOR Y2, Y1, Y1
+ VPXOR Y8, Y3, Y3
+ VPXOR Y1, Y3, Y1
+ VPADDQ Y1, Y0, Y0
+ VPERM2F128 $0x00, Y0, Y0, Y7
+ VPAND MASK_YMM_LO<>+0(SB), Y0, Y0
VPERM2F128 $0x11, Y6, Y6, Y2
- VPSRLQ $6, Y2, Y8
-
- MOVQ R8, DI
- RORXQ $41, R11, R13
- RORXQ $18, R11, R14
- ADDQ 1*8+frame_YFER(SP), CX
- ORQ R9, DI
-
- MOVQ AX, R15
- RORXQ $34, R8, R12
- XORQ R14, R13
- XORQ BX, R15
-
- RORXQ $14, R11, R14
- XORQ R14, R13
- RORXQ $39, R8, R14
- ANDQ R11, R15
- ADDQ CX, R10
-
- ANDQ DX, DI
- XORQ R12, R14
-
- RORXQ $28, R8, R12
- XORQ BX, R15
-
- XORQ R12, R14
- MOVQ R8, R12
- ANDQ R9, R12
- ADDQ R13, R15
-
- ORQ R12, DI
- ADDQ R14, CX
-
- ADDQ R15, R10
- ADDQ R15, CX
- ADDQ DI, CX
-
- VPSRLQ $19, Y2, Y3
- VPSLLQ $(64-19), Y2, Y1
- VPOR Y1, Y3, Y3
- VPXOR Y3, Y8, Y8
- VPSRLQ $61, Y2, Y3
- VPSLLQ $(64-61), Y2, Y1
- VPOR Y1, Y3, Y3
- VPXOR Y3, Y8, Y8
-
- VPADDQ Y8, Y7, Y7
-
- VPSRLQ $6, Y7, Y8
-
- MOVQ CX, DI
- RORXQ $41, R10, R13
- ADDQ 2*8+frame_YFER(SP), BX
-
- RORXQ $18, R10, R14
- ORQ DX, DI
- MOVQ R11, R15
- XORQ AX, R15
-
- RORXQ $34, CX, R12
- XORQ R14, R13
- ANDQ R10, R15
-
- RORXQ $14, R10, R14
- ADDQ BX, R9
- ANDQ R8, DI
-
- XORQ R14, R13
- RORXQ $39, CX, R14
- XORQ AX, R15
-
- XORQ R12, R14
- RORXQ $28, CX, R12
-
- XORQ R12, R14
- MOVQ CX, R12
- ANDQ DX, R12
- ADDQ R13, R15
-
- ORQ R12, DI
- ADDQ R14, BX
- ADDQ R15, R9
- ADDQ R15, BX
-
- ADDQ DI, BX
-
- VPSRLQ $19, Y7, Y3
- VPSLLQ $(64-19), Y7, Y1
- VPOR Y1, Y3, Y3
- VPXOR Y3, Y8, Y8
- VPSRLQ $61, Y7, Y3
- VPSLLQ $(64-61), Y7, Y1
- VPOR Y1, Y3, Y3
- VPXOR Y3, Y8, Y8
-
- VPADDQ Y8, Y0, Y2
-
- VPBLENDD $0xF0, Y2, Y7, Y7
-
- MOVQ BX, DI
- RORXQ $41, R9, R13
- RORXQ $18, R9, R14
- ADDQ 3*8+frame_YFER(SP), AX
- ORQ R8, DI
-
- MOVQ R10, R15
- RORXQ $34, BX, R12
- XORQ R14, R13
- XORQ R11, R15
-
- RORXQ $14, R9, R14
- ANDQ R9, R15
- ADDQ AX, DX
- ANDQ CX, DI
-
- XORQ R14, R13
- XORQ R11, R15
-
- RORXQ $39, BX, R14
- ADDQ R13, R15
-
- XORQ R12, R14
- ADDQ R15, DX
-
- RORXQ $28, BX, R12
-
- XORQ R12, R14
- MOVQ BX, R12
- ANDQ R8, R12
- ORQ R12, DI
-
- ADDQ R14, AX
- ADDQ R15, AX
- ADDQ DI, AX
-
- SUBQ $1, frame_SRND(SP)
- JNE loop1
-
- MOVQ $2, frame_SRND(SP)
+ VPSRLQ $0x06, Y2, Y8
+ MOVQ R8, DI
+ RORXQ $0x29, R11, R13
+ RORXQ $0x12, R11, R14
+ ADDQ 8(SP), CX
+ ORQ R9, DI
+ MOVQ AX, R15
+ RORXQ $0x22, R8, R12
+ XORQ R14, R13
+ XORQ BX, R15
+ RORXQ $0x0e, R11, R14
+ XORQ R14, R13
+ RORXQ $0x27, R8, R14
+ ANDQ R11, R15
+ ADDQ CX, R10
+ ANDQ DX, DI
+ XORQ R12, R14
+ RORXQ $0x1c, R8, R12
+ XORQ BX, R15
+ XORQ R12, R14
+ MOVQ R8, R12
+ ANDQ R9, R12
+ ADDQ R13, R15
+ ORQ R12, DI
+ ADDQ R14, CX
+ ADDQ R15, R10
+ ADDQ R15, CX
+ ADDQ DI, CX
+ VPSRLQ $0x13, Y2, Y3
+ VPSLLQ $0x2d, Y2, Y1
+ VPOR Y1, Y3, Y3
+ VPXOR Y3, Y8, Y8
+ VPSRLQ $0x3d, Y2, Y3
+ VPSLLQ $0x03, Y2, Y1
+ VPOR Y1, Y3, Y3
+ VPXOR Y3, Y8, Y8
+ VPADDQ Y8, Y7, Y7
+ VPSRLQ $0x06, Y7, Y8
+ MOVQ CX, DI
+ RORXQ $0x29, R10, R13
+ ADDQ 16(SP), BX
+ RORXQ $0x12, R10, R14
+ ORQ DX, DI
+ MOVQ R11, R15
+ XORQ AX, R15
+ RORXQ $0x22, CX, R12
+ XORQ R14, R13
+ ANDQ R10, R15
+ RORXQ $0x0e, R10, R14
+ ADDQ BX, R9
+ ANDQ R8, DI
+ XORQ R14, R13
+ RORXQ $0x27, CX, R14
+ XORQ AX, R15
+ XORQ R12, R14
+ RORXQ $0x1c, CX, R12
+ XORQ R12, R14
+ MOVQ CX, R12
+ ANDQ DX, R12
+ ADDQ R13, R15
+ ORQ R12, DI
+ ADDQ R14, BX
+ ADDQ R15, R9
+ ADDQ R15, BX
+ ADDQ DI, BX
+ VPSRLQ $0x13, Y7, Y3
+ VPSLLQ $0x2d, Y7, Y1
+ VPOR Y1, Y3, Y3
+ VPXOR Y3, Y8, Y8
+ VPSRLQ $0x3d, Y7, Y3
+ VPSLLQ $0x03, Y7, Y1
+ VPOR Y1, Y3, Y3
+ VPXOR Y3, Y8, Y8
+ VPADDQ Y8, Y0, Y2
+ VPBLENDD $0xf0, Y2, Y7, Y7
+ MOVQ BX, DI
+ RORXQ $0x29, R9, R13
+ RORXQ $0x12, R9, R14
+ ADDQ 24(SP), AX
+ ORQ R8, DI
+ MOVQ R10, R15
+ RORXQ $0x22, BX, R12
+ XORQ R14, R13
+ XORQ R11, R15
+ RORXQ $0x0e, R9, R14
+ ANDQ R9, R15
+ ADDQ AX, DX
+ ANDQ CX, DI
+ XORQ R14, R13
+ XORQ R11, R15
+ RORXQ $0x27, BX, R14
+ ADDQ R13, R15
+ XORQ R12, R14
+ ADDQ R15, DX
+ RORXQ $0x1c, BX, R12
+ XORQ R12, R14
+ MOVQ BX, R12
+ ANDQ R8, R12
+ ORQ R12, DI
+ ADDQ R14, AX
+ ADDQ R15, AX
+ ADDQ DI, AX
+ SUBQ $0x01, 32(SP)
+ JNE loop1
+ MOVQ $0x00000002, 32(SP)
loop2:
VPADDQ (BP), Y4, Y0
- VMOVDQU Y0, frame_YFER(SP)
-
- MOVQ R9, R15
- RORXQ $41, DX, R13
- RORXQ $18, DX, R14
- XORQ R10, R15
-
- XORQ R14, R13
- RORXQ $14, DX, R14
- ANDQ DX, R15
-
- XORQ R14, R13
- RORXQ $34, AX, R12
- XORQ R10, R15
- RORXQ $39, AX, R14
- MOVQ AX, DI
-
- XORQ R12, R14
- RORXQ $28, AX, R12
- ADDQ frame_YFER(SP), R11
- ORQ CX, DI
-
- XORQ R12, R14
- MOVQ AX, R12
- ANDQ BX, DI
- ANDQ CX, R12
- ADDQ R13, R15
-
- ADDQ R11, R8
- ORQ R12, DI
- ADDQ R14, R11
-
- ADDQ R15, R8
-
- ADDQ R15, R11
- MOVQ DX, R15
- RORXQ $41, R8, R13
- RORXQ $18, R8, R14
- XORQ R9, R15
-
- XORQ R14, R13
- RORXQ $14, R8, R14
- ANDQ R8, R15
- ADDQ DI, R11
-
- XORQ R14, R13
- RORXQ $34, R11, R12
- XORQ R9, R15
- RORXQ $39, R11, R14
- MOVQ R11, DI
-
- XORQ R12, R14
- RORXQ $28, R11, R12
- ADDQ 8*1+frame_YFER(SP), R10
- ORQ BX, DI
-
- XORQ R12, R14
- MOVQ R11, R12
- ANDQ AX, DI
- ANDQ BX, R12
- ADDQ R13, R15
-
- ADDQ R10, CX
- ORQ R12, DI
- ADDQ R14, R10
-
- ADDQ R15, CX
-
- ADDQ R15, R10
- MOVQ R8, R15
- RORXQ $41, CX, R13
- RORXQ $18, CX, R14
- XORQ DX, R15
-
- XORQ R14, R13
- RORXQ $14, CX, R14
- ANDQ CX, R15
- ADDQ DI, R10
-
- XORQ R14, R13
- RORXQ $34, R10, R12
- XORQ DX, R15
- RORXQ $39, R10, R14
- MOVQ R10, DI
-
- XORQ R12, R14
- RORXQ $28, R10, R12
- ADDQ 8*2+frame_YFER(SP), R9
- ORQ AX, DI
-
- XORQ R12, R14
- MOVQ R10, R12
- ANDQ R11, DI
- ANDQ AX, R12
- ADDQ R13, R15
-
- ADDQ R9, BX
- ORQ R12, DI
- ADDQ R14, R9
-
- ADDQ R15, BX
-
- ADDQ R15, R9
- MOVQ CX, R15
- RORXQ $41, BX, R13
- RORXQ $18, BX, R14
- XORQ R8, R15
-
- XORQ R14, R13
- RORXQ $14, BX, R14
- ANDQ BX, R15
- ADDQ DI, R9
-
- XORQ R14, R13
- RORXQ $34, R9, R12
- XORQ R8, R15
- RORXQ $39, R9, R14
- MOVQ R9, DI
-
- XORQ R12, R14
- RORXQ $28, R9, R12
- ADDQ 8*3+frame_YFER(SP), DX
- ORQ R11, DI
-
- XORQ R12, R14
- MOVQ R9, R12
- ANDQ R10, DI
- ANDQ R11, R12
- ADDQ R13, R15
-
- ADDQ DX, AX
- ORQ R12, DI
- ADDQ R14, DX
-
- ADDQ R15, AX
-
- ADDQ R15, DX
-
- ADDQ DI, DX
-
- VPADDQ 1*32(BP), Y5, Y0
- VMOVDQU Y0, frame_YFER(SP)
- ADDQ $(2*32), BP
-
- MOVQ BX, R15
- RORXQ $41, AX, R13
- RORXQ $18, AX, R14
- XORQ CX, R15
-
- XORQ R14, R13
- RORXQ $14, AX, R14
- ANDQ AX, R15
-
- XORQ R14, R13
- RORXQ $34, DX, R12
- XORQ CX, R15
- RORXQ $39, DX, R14
- MOVQ DX, DI
-
- XORQ R12, R14
- RORXQ $28, DX, R12
- ADDQ frame_YFER(SP), R8
- ORQ R10, DI
-
- XORQ R12, R14
- MOVQ DX, R12
- ANDQ R9, DI
- ANDQ R10, R12
- ADDQ R13, R15
-
- ADDQ R8, R11
- ORQ R12, DI
- ADDQ R14, R8
-
- ADDQ R15, R11
-
- ADDQ R15, R8
- MOVQ AX, R15
- RORXQ $41, R11, R13
- RORXQ $18, R11, R14
- XORQ BX, R15
-
- XORQ R14, R13
- RORXQ $14, R11, R14
- ANDQ R11, R15
- ADDQ DI, R8
-
- XORQ R14, R13
- RORXQ $34, R8, R12
- XORQ BX, R15
- RORXQ $39, R8, R14
- MOVQ R8, DI
-
- XORQ R12, R14
- RORXQ $28, R8, R12
- ADDQ 8*1+frame_YFER(SP), CX
- ORQ R9, DI
-
- XORQ R12, R14
- MOVQ R8, R12
- ANDQ DX, DI
- ANDQ R9, R12
- ADDQ R13, R15
-
- ADDQ CX, R10
- ORQ R12, DI
- ADDQ R14, CX
-
- ADDQ R15, R10
-
- ADDQ R15, CX
- MOVQ R11, R15
- RORXQ $41, R10, R13
- RORXQ $18, R10, R14
- XORQ AX, R15
-
- XORQ R14, R13
- RORXQ $14, R10, R14
- ANDQ R10, R15
- ADDQ DI, CX
-
- XORQ R14, R13
- RORXQ $34, CX, R12
- XORQ AX, R15
- RORXQ $39, CX, R14
- MOVQ CX, DI
-
- XORQ R12, R14
- RORXQ $28, CX, R12
- ADDQ 8*2+frame_YFER(SP), BX
- ORQ DX, DI
-
- XORQ R12, R14
- MOVQ CX, R12
- ANDQ R8, DI
- ANDQ DX, R12
- ADDQ R13, R15
-
- ADDQ BX, R9
- ORQ R12, DI
- ADDQ R14, BX
-
- ADDQ R15, R9
-
- ADDQ R15, BX
- MOVQ R10, R15
- RORXQ $41, R9, R13
- RORXQ $18, R9, R14
- XORQ R11, R15
-
- XORQ R14, R13
- RORXQ $14, R9, R14
- ANDQ R9, R15
- ADDQ DI, BX
-
- XORQ R14, R13
- RORXQ $34, BX, R12
- XORQ R11, R15
- RORXQ $39, BX, R14
- MOVQ BX, DI
-
- XORQ R12, R14
- RORXQ $28, BX, R12
- ADDQ 8*3+frame_YFER(SP), AX
- ORQ R8, DI
-
- XORQ R12, R14
- MOVQ BX, R12
- ANDQ CX, DI
- ANDQ R8, R12
- ADDQ R13, R15
-
- ADDQ AX, DX
- ORQ R12, DI
- ADDQ R14, AX
-
- ADDQ R15, DX
-
- ADDQ R15, AX
-
- ADDQ DI, AX
-
+ VMOVDQU Y0, (SP)
+ MOVQ R9, R15
+ RORXQ $0x29, DX, R13
+ RORXQ $0x12, DX, R14
+ XORQ R10, R15
+ XORQ R14, R13
+ RORXQ $0x0e, DX, R14
+ ANDQ DX, R15
+ XORQ R14, R13
+ RORXQ $0x22, AX, R12
+ XORQ R10, R15
+ RORXQ $0x27, AX, R14
+ MOVQ AX, DI
+ XORQ R12, R14
+ RORXQ $0x1c, AX, R12
+ ADDQ (SP), R11
+ ORQ CX, DI
+ XORQ R12, R14
+ MOVQ AX, R12
+ ANDQ BX, DI
+ ANDQ CX, R12
+ ADDQ R13, R15
+ ADDQ R11, R8
+ ORQ R12, DI
+ ADDQ R14, R11
+ ADDQ R15, R8
+ ADDQ R15, R11
+ MOVQ DX, R15
+ RORXQ $0x29, R8, R13
+ RORXQ $0x12, R8, R14
+ XORQ R9, R15
+ XORQ R14, R13
+ RORXQ $0x0e, R8, R14
+ ANDQ R8, R15
+ ADDQ DI, R11
+ XORQ R14, R13
+ RORXQ $0x22, R11, R12
+ XORQ R9, R15
+ RORXQ $0x27, R11, R14
+ MOVQ R11, DI
+ XORQ R12, R14
+ RORXQ $0x1c, R11, R12
+ ADDQ 8(SP), R10
+ ORQ BX, DI
+ XORQ R12, R14
+ MOVQ R11, R12
+ ANDQ AX, DI
+ ANDQ BX, R12
+ ADDQ R13, R15
+ ADDQ R10, CX
+ ORQ R12, DI
+ ADDQ R14, R10
+ ADDQ R15, CX
+ ADDQ R15, R10
+ MOVQ R8, R15
+ RORXQ $0x29, CX, R13
+ RORXQ $0x12, CX, R14
+ XORQ DX, R15
+ XORQ R14, R13
+ RORXQ $0x0e, CX, R14
+ ANDQ CX, R15
+ ADDQ DI, R10
+ XORQ R14, R13
+ RORXQ $0x22, R10, R12
+ XORQ DX, R15
+ RORXQ $0x27, R10, R14
+ MOVQ R10, DI
+ XORQ R12, R14
+ RORXQ $0x1c, R10, R12
+ ADDQ 16(SP), R9
+ ORQ AX, DI
+ XORQ R12, R14
+ MOVQ R10, R12
+ ANDQ R11, DI
+ ANDQ AX, R12
+ ADDQ R13, R15
+ ADDQ R9, BX
+ ORQ R12, DI
+ ADDQ R14, R9
+ ADDQ R15, BX
+ ADDQ R15, R9
+ MOVQ CX, R15
+ RORXQ $0x29, BX, R13
+ RORXQ $0x12, BX, R14
+ XORQ R8, R15
+ XORQ R14, R13
+ RORXQ $0x0e, BX, R14
+ ANDQ BX, R15
+ ADDQ DI, R9
+ XORQ R14, R13
+ RORXQ $0x22, R9, R12
+ XORQ R8, R15
+ RORXQ $0x27, R9, R14
+ MOVQ R9, DI
+ XORQ R12, R14
+ RORXQ $0x1c, R9, R12
+ ADDQ 24(SP), DX
+ ORQ R11, DI
+ XORQ R12, R14
+ MOVQ R9, R12
+ ANDQ R10, DI
+ ANDQ R11, R12
+ ADDQ R13, R15
+ ADDQ DX, AX
+ ORQ R12, DI
+ ADDQ R14, DX
+ ADDQ R15, AX
+ ADDQ R15, DX
+ ADDQ DI, DX
+ VPADDQ 32(BP), Y5, Y0
+ VMOVDQU Y0, (SP)
+ ADDQ $0x40, BP
+ MOVQ BX, R15
+ RORXQ $0x29, AX, R13
+ RORXQ $0x12, AX, R14
+ XORQ CX, R15
+ XORQ R14, R13
+ RORXQ $0x0e, AX, R14
+ ANDQ AX, R15
+ XORQ R14, R13
+ RORXQ $0x22, DX, R12
+ XORQ CX, R15
+ RORXQ $0x27, DX, R14
+ MOVQ DX, DI
+ XORQ R12, R14
+ RORXQ $0x1c, DX, R12
+ ADDQ (SP), R8
+ ORQ R10, DI
+ XORQ R12, R14
+ MOVQ DX, R12
+ ANDQ R9, DI
+ ANDQ R10, R12
+ ADDQ R13, R15
+ ADDQ R8, R11
+ ORQ R12, DI
+ ADDQ R14, R8
+ ADDQ R15, R11
+ ADDQ R15, R8
+ MOVQ AX, R15
+ RORXQ $0x29, R11, R13
+ RORXQ $0x12, R11, R14
+ XORQ BX, R15
+ XORQ R14, R13
+ RORXQ $0x0e, R11, R14
+ ANDQ R11, R15
+ ADDQ DI, R8
+ XORQ R14, R13
+ RORXQ $0x22, R8, R12
+ XORQ BX, R15
+ RORXQ $0x27, R8, R14
+ MOVQ R8, DI
+ XORQ R12, R14
+ RORXQ $0x1c, R8, R12
+ ADDQ 8(SP), CX
+ ORQ R9, DI
+ XORQ R12, R14
+ MOVQ R8, R12
+ ANDQ DX, DI
+ ANDQ R9, R12
+ ADDQ R13, R15
+ ADDQ CX, R10
+ ORQ R12, DI
+ ADDQ R14, CX
+ ADDQ R15, R10
+ ADDQ R15, CX
+ MOVQ R11, R15
+ RORXQ $0x29, R10, R13
+ RORXQ $0x12, R10, R14
+ XORQ AX, R15
+ XORQ R14, R13
+ RORXQ $0x0e, R10, R14
+ ANDQ R10, R15
+ ADDQ DI, CX
+ XORQ R14, R13
+ RORXQ $0x22, CX, R12
+ XORQ AX, R15
+ RORXQ $0x27, CX, R14
+ MOVQ CX, DI
+ XORQ R12, R14
+ RORXQ $0x1c, CX, R12
+ ADDQ 16(SP), BX
+ ORQ DX, DI
+ XORQ R12, R14
+ MOVQ CX, R12
+ ANDQ R8, DI
+ ANDQ DX, R12
+ ADDQ R13, R15
+ ADDQ BX, R9
+ ORQ R12, DI
+ ADDQ R14, BX
+ ADDQ R15, R9
+ ADDQ R15, BX
+ MOVQ R10, R15
+ RORXQ $0x29, R9, R13
+ RORXQ $0x12, R9, R14
+ XORQ R11, R15
+ XORQ R14, R13
+ RORXQ $0x0e, R9, R14
+ ANDQ R9, R15
+ ADDQ DI, BX
+ XORQ R14, R13
+ RORXQ $0x22, BX, R12
+ XORQ R11, R15
+ RORXQ $0x27, BX, R14
+ MOVQ BX, DI
+ XORQ R12, R14
+ RORXQ $0x1c, BX, R12
+ ADDQ 24(SP), AX
+ ORQ R8, DI
+ XORQ R12, R14
+ MOVQ BX, R12
+ ANDQ CX, DI
+ ANDQ R8, R12
+ ADDQ R13, R15
+ ADDQ AX, DX
+ ORQ R12, DI
+ ADDQ R14, AX
+ ADDQ R15, DX
+ ADDQ R15, AX
+ ADDQ DI, AX
VMOVDQU Y6, Y4
VMOVDQU Y7, Y5
-
- SUBQ $1, frame_SRND(SP)
- JNE loop2
-
- addm(8*0(SI),AX)
- addm(8*1(SI),BX)
- addm(8*2(SI),CX)
- addm(8*3(SI),R8)
- addm(8*4(SI),DX)
- addm(8*5(SI),R9)
- addm(8*6(SI),R10)
- addm(8*7(SI),R11)
-
- MOVQ frame_INP(SP), DI
- ADDQ $128, DI
- CMPQ DI, frame_INPEND(SP)
- JNE loop0
+ SUBQ $0x01, 32(SP)
+ JNE loop2
+ ADDQ (SI), AX
+ MOVQ AX, (SI)
+ ADDQ 8(SI), BX
+ MOVQ BX, 8(SI)
+ ADDQ 16(SI), CX
+ MOVQ CX, 16(SI)
+ ADDQ 24(SI), R8
+ MOVQ R8, 24(SI)
+ ADDQ 32(SI), DX
+ MOVQ DX, 32(SI)
+ ADDQ 40(SI), R9
+ MOVQ R9, 40(SI)
+ ADDQ 48(SI), R10
+ MOVQ R10, 48(SI)
+ ADDQ 56(SI), R11
+ MOVQ R11, 56(SI)
+ MOVQ 40(SP), DI
+ ADDQ $0x80, DI
+ CMPQ DI, 48(SP)
+ JNE loop0
done_hash:
VZEROUPPER
RET
+
+DATA MASK_YMM_LO<>+0(SB)/8, $0x0000000000000000
+DATA MASK_YMM_LO<>+8(SB)/8, $0x0000000000000000
+DATA MASK_YMM_LO<>+16(SB)/8, $0xffffffffffffffff
+DATA MASK_YMM_LO<>+24(SB)/8, $0xffffffffffffffff
+GLOBL MASK_YMM_LO<>(SB), RODATA|NOPTR, $32