#include "go_asm.h"
#include "textflag.h"
-TEXT ·Equal(SB),NOSPLIT,$0-49
+TEXT ·Equal(SB),NOSPLIT|NOFRAME,$0-49
MOVD a_len+8(FP), R4
MOVD b_len+32(FP), R5
CMP R5, R4 // unequal lengths are not equal
BNE noteq
MOVD a_base+0(FP), R3
MOVD b_base+24(FP), R4
- BL memeqbody<>(SB)
-
- MOVBZ R9,ret+48(FP)
- RET
+ MOVD $ret+48(FP), R10
+ BR memeqbody<>(SB)
noteq:
MOVBZ $0,ret+48(FP)
MOVBZ R3,ret+48(FP)
RET
-TEXT bytes·Equal(SB),NOSPLIT,$0-49
+TEXT bytes·Equal(SB),NOSPLIT|NOFRAME,$0-49
FUNCDATA $0, ·Equal·args_stackmap(SB)
MOVD a_len+8(FP), R4
MOVD b_len+32(FP), R5
BNE noteq
MOVD a_base+0(FP), R3
MOVD b_base+24(FP), R4
- BL memeqbody<>(SB)
-
- MOVBZ R9,ret+48(FP)
- RET
+ MOVD $ret+48(FP), R10
+ BR memeqbody<>(SB)
noteq:
MOVBZ $0,ret+48(FP)
RET
// memequal(a, b unsafe.Pointer, size uintptr) bool
-TEXT runtime·memequal(SB),NOSPLIT,$0-25
+TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
MOVD a+0(FP), R3
MOVD b+8(FP), R4
MOVD size+16(FP), R5
+ MOVD $ret+24(FP), R10
- BL memeqbody<>(SB)
- MOVB R9, ret+24(FP)
- RET
+ BR memeqbody<>(SB)
// memequal_varlen(a, b unsafe.Pointer) bool
-TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
+TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
MOVD a+0(FP), R3
MOVD b+8(FP), R4
CMP R3, R4
BEQ eq
MOVD 8(R11), R5 // compiler stores size at offset 8 in the closure
- BL memeqbody<>(SB)
- MOVB R9, ret+16(FP)
- RET
+ MOVD $ret+16(FP), R10
+ BR memeqbody<>(SB)
eq:
MOVD $1, R3
MOVB R3, ret+16(FP)
// R3 = s1
// R4 = s2
// R5 = len
-// R9 = return value
+// R10 = addr of return value (byte)
TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0
MOVD R5,CTR
CMP R5,$8 // only optimize >=8
setup32a: // 8 byte aligned, >= 32 bytes
SRADCC $5,R5,R6 // number of 32 byte chunks to compare
MOVD R6,CTR
+ MOVD $16,R14 // index for VSX loads and stores
loop32a:
- MOVD 0(R3),R6 // doublewords to compare
- MOVD 0(R4),R7
- MOVD 8(R3),R8 //
- MOVD 8(R4),R9
- CMP R6,R7 // bytes batch?
- BNE noteq
- MOVD 16(R3),R6
- MOVD 16(R4),R7
- CMP R8,R9 // bytes match?
- MOVD 24(R3),R8
- MOVD 24(R4),R9
- BNE noteq
- CMP R6,R7 // bytes match?
- BNE noteq
+ LXVD2X (R3+R0), VS32 // VS32 = V0
+ LXVD2X (R4+R0), VS33 // VS33 = V1
+ VCMPEQUBCC V0, V1, V2 // compare, setting CR6
+ BGE CR6, noteq
+ LXVD2X (R3+R14), VS32
+ LXVD2X (R4+R14), VS33
+ VCMPEQUBCC V0, V1, V2
+ BGE CR6, noteq
ADD $32,R3 // bump up to next 32
ADD $32,R4
- CMP R8,R9 // bytes match?
- BC 8,2,loop32a // br ctr and cr
- BNE noteq
+ BC 16, 0, loop32a // br ctr and cr
ANDCC $24,R5,R6 // Any 8 byte chunks?
BEQ leftover // and result is 0
setup8a:
BNE noteq
BR equal
noteq:
- MOVD $0, R9
+ MOVB $0, (R10)
RET
equal:
- MOVD $1, R9
+ MOVD $1, R3
+ MOVB R3, (R10)
RET