// for small values of z_len (0.90x in the worst case), but
// gain significant performance as z_len increases (up to
// 1.45x).
+
+ PCALIGN $32
loop:
MOVD 8(R8), R11 // R11 = x[i]
MOVD 16(R8), R12 // R12 = x[i+1]
// for small values of z_len (0.92x in the worst case), but
// gain significant performance as z_len increases (up to
// 1.45x).
+
+ PCALIGN $32
loop:
MOVD 8(R8), R11 // R11 = x[i]
MOVD 16(R8), R12 // R12 = x[i+1]
CMP R0, R9
MOVD R9, CTR // Set up the loop counter
BEQ tail // If R9 = 0, we can't use the loop
+ PCALIGN $32
loop:
MOVD 8(R8), R20 // R20 = x[i]
// The loop here is almost the same as the one used in s390x, but
// we don't need to capture CA every iteration because we've already
// done that above.
+
+ PCALIGN $32
loop:
MOVD 8(R8), R20
MOVD 16(R8), R21
CMP R5, R0 // iterate from i=len(z)-1 to 0
BEQ loopexit // Already at end?
MOVD 0(R15),R10 // x[i]
+ PCALIGN $32
shloop:
SLD R9, R10, R10 // x[i]<<s
MOVDU -8(R15), R14
CMP R0, R14
MOVD R14, CTR // Set up the loop counter
BEQ tail // If R9 = 0, we can't use the loop
+ PCALIGN $32
loop:
MOVD 8(R8), R20 // R20 = x[i]
MOVD R0, R4 // R4 = c = 0
MOVD R22, CTR // Initialize loop counter
BEQ done
+ PCALIGN $32
loop:
MOVD (R8)(R3), R20 // Load x[i]