bytes: improve Compare function on amd64 for large byte arrays

author Uttam C Pawar <uttam.c.pawar@intel.com>

Thu, 2 Jul 2015 18:43:46 +0000 (11:43 -0700)

committer Keith Randall <khr@golang.org>

Wed, 26 Aug 2015 03:52:20 +0000 (03:52 +0000)
author Uttam C Pawar <uttam.c.pawar@intel.com>
Thu, 2 Jul 2015 18:43:46 +0000 (11:43 -0700)
committer Keith Randall <khr@golang.org>
Wed, 26 Aug 2015 03:52:20 +0000 (03:52 +0000)
diff --git a/src/bytes/bytes_test.go b/src/bytes/bytes_test.go

index 6245e481805779e8665965053bde7cae4ce31e78..8df62fcc6aec1742a277d5fafd3d4a9705a63d30 100644 (file)
--- a/src/bytes/bytes_test.go
+++ b/src/bytes/bytes_test.go
@@ -1255,3 +1255,34 @@ func BenchmarkRepeat(b *testing.B) {
                 Repeat([]byte("-"), 80)
         }
  }
+
+func benchmarkBytesCompare(b *testing.B, n int) {
+       var x = make([]byte, n)
+       var y = make([]byte, n)
+
+       for i := 0; i < n; i++ {
+               x[i] = 'a'
+       }
+
+       for i := 0; i < n; i++ {
+               y[i] = 'a'
+       }
+
+       b.ResetTimer()
+       for i := 0; i < b.N; i++ {
+               Compare(x, y)
+       }
+}
+
+func BenchmarkBytesCompare1(b *testing.B)    { benchmarkBytesCompare(b, 1) }
+func BenchmarkBytesCompare2(b *testing.B)    { benchmarkBytesCompare(b, 2) }
+func BenchmarkBytesCompare4(b *testing.B)    { benchmarkBytesCompare(b, 4) }
+func BenchmarkBytesCompare8(b *testing.B)    { benchmarkBytesCompare(b, 8) }
+func BenchmarkBytesCompare16(b *testing.B)   { benchmarkBytesCompare(b, 16) }
+func BenchmarkBytesCompare32(b *testing.B)   { benchmarkBytesCompare(b, 32) }
+func BenchmarkBytesCompare64(b *testing.B)   { benchmarkBytesCompare(b, 64) }
+func BenchmarkBytesCompare128(b *testing.B)  { benchmarkBytesCompare(b, 128) }
+func BenchmarkBytesCompare256(b *testing.B)  { benchmarkBytesCompare(b, 256) }
+func BenchmarkBytesCompare512(b *testing.B)  { benchmarkBytesCompare(b, 512) }
+func BenchmarkBytesCompare1024(b *testing.B) { benchmarkBytesCompare(b, 1024) }
+func BenchmarkBytesCompare2048(b *testing.B) { benchmarkBytesCompare(b, 2048) }
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s

index 3b4ca4d012f88fdbc099e19fd3581bdf3356d674..ff2da3a858094e8e818799572f24bbf60fe8f852 100644 (file)
--- a/src/runtime/asm_amd64.s
+++ b/src/runtime/asm_amd64.s
@@ -1445,6 +1445,8 @@ TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
         CMPQ    R8, $8
         JB      small
  
+       CMPQ    R8, $63
+       JA      big_loop
  loop:
         CMPQ    R8, $16
         JBE     _0through16
@@ -1459,6 +1461,17 @@ loop:
         SUBQ    $16, R8
         JMP     loop
         
+diff64:
+       ADDQ    $48, SI
+       ADDQ    $48, DI
+       JMP     diff16
+diff48:
+       ADDQ    $32, SI
+       ADDQ    $32, DI
+       JMP     diff16
+diff32:
+       ADDQ    $16, SI
+       ADDQ    $16, DI
         // AX = bit mask of differences
  diff16:
         BSFQ    AX, BX  // index of first byte that differs
@@ -1545,6 +1558,43 @@ allsame:
         MOVQ    AX, (R9)
         RET
  
+       // this works for >= 64 bytes of data.
+big_loop:
+       MOVOU   (SI), X0
+       MOVOU   (DI), X1
+       PCMPEQB X0, X1
+       PMOVMSKB X1, AX
+       XORQ    $0xffff, AX
+       JNE     diff16
+
+       MOVOU   16(SI), X0
+       MOVOU   16(DI), X1
+       PCMPEQB X0, X1
+       PMOVMSKB X1, AX
+       XORQ    $0xffff, AX
+       JNE     diff32
+
+       MOVOU   32(SI), X0
+       MOVOU   32(DI), X1
+       PCMPEQB X0, X1
+       PMOVMSKB X1, AX
+       XORQ    $0xffff, AX
+       JNE     diff48
+
+       MOVOU   48(SI), X0
+       MOVOU   48(DI), X1
+       PCMPEQB X0, X1
+       PMOVMSKB X1, AX
+       XORQ    $0xffff, AX
+       JNE     diff64
+
+       ADDQ    $64, SI
+       ADDQ    $64, DI
+       SUBQ    $64, R8
+       CMPQ    R8, $64
+       JBE     loop
+       JMP     big_loop
+
  TEXT bytes·IndexByte(SB),NOSPLIT,$0-40
         MOVQ s+0(FP), SI
         MOVQ s_len+8(FP), BX
author	Uttam C Pawar <uttam.c.pawar@intel.com>
	Thu, 2 Jul 2015 18:43:46 +0000 (11:43 -0700)
committer	Keith Randall <khr@golang.org>
	Wed, 26 Aug 2015 03:52:20 +0000 (03:52 +0000)
src/bytes/bytes_test.go		patch \| blob \| history
src/runtime/asm_amd64.s		patch \| blob \| history