MOVB R3, ret+16(FP)
RET
-// Do an efficieint memequal for ppc64
-// for reuse where possible.
+// Do an efficient memcmp for ppc64le
+// R3 = s1 len
+// R4 = s2 len
+// R5 = s1 addr
+// R6 = s2 addr
+// R7 = addr of return value
+TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0
+ MOVD R3,R8 // set up length
+ CMP R3,R4,CR2 // unequal?
+ BC 12,8,setuplen // BLT CR2
+ MOVD R4,R8 // use R4 for comparison len
+setuplen:
+ MOVD R8,CTR // set up loop counter
+ CMP R8,$8 // only optimize >=8
+ BLT simplecheck
+ DCBT (R5) // cache hint
+ DCBT (R6)
+ CMP R8,$32 // optimize >= 32
+ MOVD R8,R9
+ BLT setup8a // 8 byte moves only
+setup32a:
+ SRADCC $5,R8,R9 // number of 32 byte chunks
+ MOVD R9,CTR
+
+ // Special processing for 32 bytes or longer.
+ // Loading this way is faster and correct as long as the
+ // doublewords being compared are equal. Once they
+ // are found unequal, reload them in proper byte order
+ // to determine greater or less than.
+loop32a:
+ MOVD 0(R5),R9 // doublewords to compare
+ MOVD 0(R6),R10 // get 4 doublewords
+ MOVD 8(R5),R14
+ MOVD 8(R6),R15
+ CMPU R9,R10 // bytes equal?
+ MOVD $0,R16 // set up for cmpne
+ BNE cmpne // further compare for LT or GT
+ MOVD 16(R5),R9 // get next pair of doublewords
+ MOVD 16(R6),R10
+ CMPU R4,R15 // bytes match?
+ MOVD $8,R16 // set up for cmpne
+ BNE cmpne // further compare for LT or GT
+ MOVD 24(R5),R14 // get next pair of doublewords
+ MOVD 24(R6),R15
+ CMPU R9,R10 // bytes match?
+ MOVD $16,R16 // set up for cmpne
+ BNE cmpne // further compare for LT or GT
+ MOVD $-8,R16 // for cmpne, R5,R6 already inc by 32
+ ADD $32,R5 // bump up to next 32
+ ADD $32,R6
+ CMPU R14,R15 // bytes match?
+ BC 8,2,loop32a // br ctr and cr
+ BNE cmpne
+ ANDCC $24,R8,R9 // Any 8 byte chunks?
+ BEQ leftover // and result is 0
+setup8a:
+ SRADCC $3,R9,R9 // get the 8 byte count
+ BEQ leftover // shifted value is 0
+ MOVD R9,CTR // loop count for doublewords
+loop8:
+ MOVDBR (R5+R0),R9 // doublewords to compare
+ MOVDBR (R6+R0),R10 // LE compare order
+ ADD $8,R5
+ ADD $8,R6
+ CMPU R9,R10 // match?
+ BC 8,2,loop8 // bt ctr <> 0 && cr
+ BGT greater
+ BLT less
+leftover:
+ ANDCC $7,R8,R9 // check for leftover bytes
+ MOVD R9,CTR // save the ctr
+ BNE simple // leftover bytes
+ BC 12,10,equal // test CR2 for length comparison
+ BC 12,8,less
+ BR greater
+simplecheck:
+ CMP R8,$0 // remaining compare length 0
+ BNE simple // do simple compare
+ BC 12,10,equal // test CR2 for length comparison
+ BC 12,8,less // 1st len < 2nd len, result less
+ BR greater // 1st len > 2nd len must be greater
+simple:
+ MOVBZ 0(R5), R9 // get byte from 1st operand
+ ADD $1,R5
+ MOVBZ 0(R6), R10 // get byte from 2nd operand
+ ADD $1,R6
+ CMPU R9, R10
+ BC 8,2,simple // bc ctr <> 0 && cr
+ BGT greater // 1st > 2nd
+ BLT less // 1st < 2nd
+ BC 12,10,equal // test CR2 for length comparison
+ BC 12,9,greater // 2nd len > 1st len
+ BR less // must be less
+cmpne: // only here is not equal
+ MOVDBR (R5+R16),R8 // reload in reverse order
+ MOVDBR (R6+R16),R9
+ CMPU R8,R9 // compare correct endianness
+ BGT greater // here only if NE
+less:
+ MOVD $-1,R3
+ MOVD R3,(R7) // return value if A < B
+ RET
+equal:
+ MOVD $0,(R7) // return value if A == B
+ RET
+greater:
+ MOVD $1,R3
+ MOVD R3,(R7) // return value if A > B
+ RET
+
+// Do an efficient memcmp for ppc64 (BE)
+// R3 = s1 len
+// R4 = s2 len
+// R5 = s1 addr
+// R6 = s2 addr
+// R7 = addr of return value
+TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0
+ MOVD R3,R8 // set up length
+ CMP R3,R4,CR2 // unequal?
+ BC 12,8,setuplen // BLT CR2
+ MOVD R4,R8 // use R4 for comparison len
+setuplen:
+ MOVD R8,CTR // set up loop counter
+ CMP R8,$8 // only optimize >=8
+ BLT simplecheck
+ DCBT (R5) // cache hint
+ DCBT (R6)
+ CMP R8,$32 // optimize >= 32
+ MOVD R8,R9
+ BLT setup8a // 8 byte moves only
+
+setup32a:
+ SRADCC $5,R8,R9 // number of 32 byte chunks
+ MOVD R9,CTR
+loop32a:
+ MOVD 0(R5),R9 // doublewords to compare
+ MOVD 0(R6),R10 // get 4 doublewords
+ MOVD 8(R5),R14
+ MOVD 8(R6),R15
+ CMPU R9,R10 // bytes equal?
+ BLT less // found to be less
+ BGT greater // found to be greater
+ MOVD 16(R5),R9 // get next pair of doublewords
+ MOVD 16(R6),R10
+ CMPU R14,R15 // bytes match?
+ BLT less // found less
+ BGT greater // found greater
+ MOVD 24(R5),R14 // get next pair of doublewords
+ MOVD 24(R6),R15
+ CMPU R9,R10 // bytes match?
+ BLT less // found to be less
+ BGT greater // found to be greater
+ ADD $32,R5 // bump up to next 32
+ ADD $32,R6
+ CMPU R14,R15 // bytes match?
+ BC 8,2,loop32a // br ctr and cr
+ BLT less // with BE, byte ordering is
+ BGT greater // good for compare
+ ANDCC $24,R8,R9 // Any 8 byte chunks?
+ BEQ leftover // and result is 0
+setup8a:
+ SRADCC $3,R9,R9 // get the 8 byte count
+ BEQ leftover // shifted value is 0
+ MOVD R9,CTR // loop count for doublewords
+loop8:
+ MOVD (R5),R9
+ MOVD (R6),R10
+ ADD $8,R5
+ ADD $8,R6
+ CMPU R9,R10 // match?
+ BC 8,2,loop8 // bt ctr <> 0 && cr
+ BGT greater
+ BLT less
+leftover:
+ ANDCC $7,R8,R9 // check for leftover bytes
+ MOVD R9,CTR // save the ctr
+ BNE simple // leftover bytes
+ BC 12,10,equal // test CR2 for length comparison
+ BC 12,8,less
+ BR greater
+simplecheck:
+ CMP R8,$0 // remaining compare length 0
+ BNE simple // do simple compare
+ BC 12,10,equal // test CR2 for length comparison
+ BC 12,8,less // 1st len < 2nd len, result less
+ BR greater // same len, must be equal
+simple:
+ MOVBZ 0(R5),R9 // get byte from 1st operand
+ ADD $1,R5
+ MOVBZ 0(R6),R10 // get byte from 2nd operand
+ ADD $1,R6
+ CMPU R9,R10
+ BC 8,2,simple // bc ctr <> 0 && cr
+ BGT greater // 1st > 2nd
+ BLT less // 1st < 2nd
+ BC 12,10,equal // test CR2 for length comparison
+ BC 12,9,greater // 2nd len > 1st len
+less:
+ MOVD $-1,R3
+ MOVD R3,(R7) // return value if A < B
+ RET
+equal:
+ MOVD $0,(R7) // return value if A == B
+ RET
+greater:
+ MOVD $1,R3
+ MOVD R3,(R7) // return value if A > B
+ RET
+
+// Do an efficient memequal for ppc64
// R3 = s1
// R4 = s2
// R5 = len
// R9 = return value
-// R6, R7 clobbered
TEXT runtime·memeqbody(SB),NOSPLIT|NOFRAME,$0-0
MOVD R5,CTR
CMP R5,$8 // only optimize >=8
MOVD s2_base+16(FP), R6
MOVD s2_len+24(FP), R4
MOVD $ret+32(FP), R7
- BR runtime·cmpbody<>(SB)
+#ifdef GOARCH_ppc64le
+ BR cmpbodyLE<>(SB)
+#else
+ BR cmpbodyBE<>(SB)
+#endif
TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
MOVD s1+0(FP), R5
MOVD s2+24(FP), R6
MOVD s2+32(FP), R4
MOVD $ret+48(FP), R7
- BR runtime·cmpbody<>(SB)
-
-// On entry:
-// R3 is the length of s1
-// R4 is the length of s2
-// R5 points to the start of s1
-// R6 points to the start of s2
-// R7 points to return value (-1/0/1 will be written here)
-//
-// On exit:
-// R5, R6, R8, R9 and R10 are clobbered
-TEXT runtime·cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
- CMP R5, R6
- BEQ samebytes // same starting pointers; compare lengths
- SUB $1, R5
- SUB $1, R6
- MOVD R4, R8
- CMP R3, R4
- BGE 2(PC)
- MOVD R3, R8 // R8 is min(R3, R4)
- ADD R5, R8 // R5 is current byte in s1, R8 is last byte in s1 to compare
-loop:
- CMP R5, R8
- BEQ samebytes // all compared bytes were the same; compare lengths
- MOVBZU 1(R5), R9
- MOVBZU 1(R6), R10
- CMP R9, R10
- BEQ loop
- // bytes differed
- MOVD $1, R4
- BGT 2(PC)
- NEG R4
- MOVD R4, (R7)
- RET
-samebytes:
- MOVD $1, R8
- CMP R3, R4
- BNE 3(PC)
- MOVD R0, (R7)
- RET
- BGT 2(PC)
- NEG R8
- MOVD R8, (R7)
- RET
+#ifdef GOARCH_ppc64le
+ BR cmpbodyLE<>(SB)
+#else
+ BR cmpbodyBE<>(SB)
+#endif
TEXT runtime·fastrand(SB), NOSPLIT, $0-4
MOVD g_m(g), R4