BNE _bunaligned
ADD $31, R(TS), R(TMP) /* do 32-byte chunks if possible */
- MOVW R(TS), savedts+4(SP)
+ MOVW R(TS), savedts-4(SP)
_b32loop:
CMP R(TMP), R(TE)
BLS _b4tail
B _b32loop
_b4tail: /* do remaining words if possible */
- MOVW savedts+4(SP), R(TS)
+ MOVW savedts-4(SP), R(TS)
ADD $3, R(TS), R(TMP)
_b4loop:
CMP R(TMP), R(TE)
BNE _funaligned
SUB $31, R(TE), R(TMP) /* do 32-byte chunks if possible */
- MOVW R(TE), savedte+4(SP)
+ MOVW R(TE), savedte-4(SP)
_f32loop:
CMP R(TMP), R(TS)
BHS _f4tail
B _f32loop
_f4tail:
- MOVW savedte+4(SP), R(TE)
+ MOVW savedte-4(SP), R(TE)
SUB $3, R(TE), R(TMP) /* do remaining words if possible */
_f4loop:
CMP R(TMP), R(TS)
BLS _b1tail
BIC $3, R(FROM) /* align source */
- MOVW R(TS), savedts+4(SP)
+ MOVW R(TS), savedts-4(SP)
MOVW (R(FROM)), R(BR0) /* prime first block register */
_bu16loop:
B _bu16loop
_bu1tail:
- MOVW savedts+4(SP), R(TS)
+ MOVW savedts-4(SP), R(TS)
ADD R(OFFSET), R(FROM)
B _b1tail
BHS _f1tail
BIC $3, R(FROM) /* align source */
- MOVW R(TE), savedte+4(SP)
+ MOVW R(TE), savedte-4(SP)
MOVW.P 4(R(FROM)), R(FR3) /* prime last block register, implicit write back */
_fu16loop:
B _fu16loop
_fu1tail:
- MOVW savedte+4(SP), R(TE)
+ MOVW savedte-4(SP), R(TE)
SUB R(OFFSET), R(FROM)
B _f1tail