RET
TEXT ·addMulVVW(SB),NOSPLIT,$0
- JMP ·addMulVVW_g(SB)
+ MOV x+24(FP), X5
+ MOV y+48(FP), X6
+ MOV z+0(FP), X7
+ MOV z_len+8(FP), X30
+
+ MOV $4, X28
+ MOV $0, X29 // c = 0
+
+ BEQZ X30, done
+ BLTU X30, X28, loop1
+
+loop4:
+ MOV 0(X5), X8 // x[0]
+ MOV 0(X7), X10 // z[0]
+ MOV 8(X5), X11 // x[1]
+ MOV 8(X7), X13 // z[1]
+ MOV 16(X5), X14 // x[2]
+ MOV 16(X7), X16 // z[2]
+ MOV 24(X5), X17 // x[3]
+ MOV 24(X7), X19 // z[3]
+
+ MULHU X8, X6, X9 // z_hi[0] = x[0] * y
+ MUL X8, X6, X8 // z_lo[0] = x[0] * y
+ ADD X8, X10, X21 // z_lo[0] = x[0] * y + z[0]
+ SLTU X8, X21, X22
+ ADD X9, X22, X9 // z_hi[0] = x[0] * y + z[0]
+ ADD X21, X29, X10 // z[0] = x[0] * y + z[0] + c
+ SLTU X21, X10, X22
+ ADD X9, X22, X29 // next c
+
+ MULHU X11, X6, X12 // z_hi[1] = x[1] * y
+ MUL X11, X6, X11 // z_lo[1] = x[1] * y
+ ADD X11, X13, X21 // z_lo[1] = x[1] * y + z[1]
+ SLTU X11, X21, X22
+ ADD X12, X22, X12 // z_hi[1] = x[1] * y + z[1]
+ ADD X21, X29, X13 // z[1] = x[1] * y + z[1] + c
+ SLTU X21, X13, X22
+ ADD X12, X22, X29 // next c
+
+ MULHU X14, X6, X15 // z_hi[2] = x[2] * y
+ MUL X14, X6, X14 // z_lo[2] = x[2] * y
+ ADD X14, X16, X21 // z_lo[2] = x[2] * y + z[2]
+ SLTU X14, X21, X22
+ ADD X15, X22, X15 // z_hi[2] = x[2] * y + z[2]
+ ADD X21, X29, X16 // z[2] = x[2] * y + z[2] + c
+ SLTU X21, X16, X22
+ ADD X15, X22, X29 // next c
+
+ MULHU X17, X6, X18 // z_hi[3] = x[3] * y
+ MUL X17, X6, X17 // z_lo[3] = x[3] * y
+ ADD X17, X19, X21 // z_lo[3] = x[3] * y + z[3]
+ SLTU X17, X21, X22
+ ADD X18, X22, X18 // z_hi[3] = x[3] * y + z[3]
+ ADD X21, X29, X19 // z[3] = x[3] * y + z[3] + c
+ SLTU X21, X19, X22
+ ADD X18, X22, X29 // next c
+ MOV X10, 0(X7) // z[0]
+ MOV X13, 8(X7) // z[1]
+ MOV X16, 16(X7) // z[2]
+ MOV X19, 24(X7) // z[3]
+
+ ADD $32, X5
+ ADD $32, X7
+ SUB $4, X30
+
+ BGEU X30, X28, loop4
+ BEQZ X30, done
+
+loop1:
+ MOV 0(X5), X10 // x
+ MOV 0(X7), X11 // z
+
+ MULHU X10, X6, X12 // z_hi = x * y
+ MUL X10, X6, X10 // z_lo = x * y
+ ADD X10, X11, X13 // z_lo = x * y + z
+ SLTU X10, X13, X15
+ ADD X12, X15, X12 // z_hi = x * y + z
+ ADD X13, X29, X10 // z = x * y + z + c
+ SLTU X13, X10, X15
+ ADD X12, X15, X29 // next c
+
+ MOV X10, 0(X7) // z
+
+ ADD $8, X5
+ ADD $8, X7
+ SUB $1, X30
+
+ BNEZ X30, loop1
+
+done:
+ MOV X29, c+56(FP) // return c
+ RET