Repeat([]byte("-"), 80)
}
}
+
+func benchmarkBytesCompare(b *testing.B, n int) {
+ var x = make([]byte, n)
+ var y = make([]byte, n)
+
+ for i := 0; i < n; i++ {
+ x[i] = 'a'
+ }
+
+ for i := 0; i < n; i++ {
+ y[i] = 'a'
+ }
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ Compare(x, y)
+ }
+}
+
+func BenchmarkBytesCompare1(b *testing.B) { benchmarkBytesCompare(b, 1) }
+func BenchmarkBytesCompare2(b *testing.B) { benchmarkBytesCompare(b, 2) }
+func BenchmarkBytesCompare4(b *testing.B) { benchmarkBytesCompare(b, 4) }
+func BenchmarkBytesCompare8(b *testing.B) { benchmarkBytesCompare(b, 8) }
+func BenchmarkBytesCompare16(b *testing.B) { benchmarkBytesCompare(b, 16) }
+func BenchmarkBytesCompare32(b *testing.B) { benchmarkBytesCompare(b, 32) }
+func BenchmarkBytesCompare64(b *testing.B) { benchmarkBytesCompare(b, 64) }
+func BenchmarkBytesCompare128(b *testing.B) { benchmarkBytesCompare(b, 128) }
+func BenchmarkBytesCompare256(b *testing.B) { benchmarkBytesCompare(b, 256) }
+func BenchmarkBytesCompare512(b *testing.B) { benchmarkBytesCompare(b, 512) }
+func BenchmarkBytesCompare1024(b *testing.B) { benchmarkBytesCompare(b, 1024) }
+func BenchmarkBytesCompare2048(b *testing.B) { benchmarkBytesCompare(b, 2048) }
CMPQ R8, $8
JB small
+ CMPQ R8, $63
+ JA big_loop
loop:
CMPQ R8, $16
JBE _0through16
SUBQ $16, R8
JMP loop
+diff64:
+ ADDQ $48, SI
+ ADDQ $48, DI
+ JMP diff16
+diff48:
+ ADDQ $32, SI
+ ADDQ $32, DI
+ JMP diff16
+diff32:
+ ADDQ $16, SI
+ ADDQ $16, DI
// AX = bit mask of differences
diff16:
BSFQ AX, BX // index of first byte that differs
MOVQ AX, (R9)
RET
+ // this works for >= 64 bytes of data.
+big_loop:
+ MOVOU (SI), X0
+ MOVOU (DI), X1
+ PCMPEQB X0, X1
+ PMOVMSKB X1, AX
+ XORQ $0xffff, AX
+ JNE diff16
+
+ MOVOU 16(SI), X0
+ MOVOU 16(DI), X1
+ PCMPEQB X0, X1
+ PMOVMSKB X1, AX
+ XORQ $0xffff, AX
+ JNE diff32
+
+ MOVOU 32(SI), X0
+ MOVOU 32(DI), X1
+ PCMPEQB X0, X1
+ PMOVMSKB X1, AX
+ XORQ $0xffff, AX
+ JNE diff48
+
+ MOVOU 48(SI), X0
+ MOVOU 48(DI), X1
+ PCMPEQB X0, X1
+ PMOVMSKB X1, AX
+ XORQ $0xffff, AX
+ JNE diff64
+
+ ADDQ $64, SI
+ ADDQ $64, DI
+ SUBQ $64, R8
+ CMPQ R8, $64
+ JBE loop
+ JMP big_loop
+
TEXT bytes·IndexByte(SB),NOSPLIT,$0-40
MOVQ s+0(FP), SI
MOVQ s_len+8(FP), BX