TEXT ·Equal(SB),NOSPLIT,$0-25
MOVW a_len+4(FP), R1
MOVW b_len+16(FP), R3
-
CMP R1, R3 // unequal lengths are not equal
B.NE notequal
+ CMP $0, R1 // short path to handle 0-byte case
+ B.EQ equal
MOVW a_base+0(FP), R0
MOVW b_base+12(FP), R2
MOVW $ret+24(FP), R7
B memeqbody<>(SB)
+equal:
+ MOVW $1, R0
+ MOVB R0, ret+24(FP)
+ RET
notequal:
MOVW $0, R0
MOVBU R0, ret+24(FP)
CMP R0, R2
B.EQ eq
MOVW size+8(FP), R1
+ CMP $0, R1
+ B.EQ eq // short path to handle 0-byte case
MOVW $ret+12(FP), R7
B memeqbody<>(SB)
eq:
MOVW b+4(FP), R2
CMP R0, R2
B.EQ eq
- MOVW 4(R7), R1 // compiler stores size at offset 4 in the closure
+ MOVW 4(R7), R1 // compiler stores size at offset 4 in the closure
+ CMP $0, R1
+ B.EQ eq // short path to handle 0-byte case
MOVW $ret+8(FP), R7
B memeqbody<>(SB)
eq:
// R1: length
// R2: data of b
// R7: points to return value
+//
+// On exit:
+// R4, R5 and R6 are clobbered
TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0
- ADD R0, R1 // end
-loop:
+ CMP $1, R1
+ B.EQ one // 1-byte special case for better performance
+
+ CMP $4, R1
+ ADD R0, R1 // R1 is the end of the range to compare
+ B.LT byte_loop // length < 4
+ AND $3, R0, R6
+ CMP $0, R6
+ B.NE byte_loop // unaligned a, use byte-wise compare (TODO: try to align a)
+ AND $3, R2, R6
+ CMP $0, R6
+ B.NE byte_loop // unaligned b, use byte-wise compare
+ AND $0xfffffffc, R1, R6
+ // length >= 4
+chunk4_loop:
+ MOVW.P 4(R0), R4
+ MOVW.P 4(R2), R5
+ CMP R4, R5
+ B.NE notequal
+ CMP R0, R6
+ B.NE chunk4_loop
CMP R0, R1
B.EQ equal // reached the end
+byte_loop:
MOVBU.P 1(R0), R4
MOVBU.P 1(R2), R5
CMP R4, R5
- B.EQ loop
-notequal:
- MOVW $0, R0
- MOVB R0, (R7)
- RET
+ B.NE notequal
+ CMP R0, R1
+ B.NE byte_loop
equal:
MOVW $1, R0
MOVB R0, (R7)
RET
+one:
+ MOVBU (R0), R4
+ MOVBU (R2), R5
+ CMP R4, R5
+ B.EQ equal
+notequal:
+ MOVW $0, R0
+ MOVB R0, (R7)
+ RET