runtime: amd64, use 4-byte ops for memmove of 4 bytes

author Keith Randall <khr@golang.org>

Mon, 23 Jan 2017 16:22:10 +0000 (08:22 -0800)

committer Keith Randall <khr@golang.org>

Mon, 23 Jan 2017 19:39:22 +0000 (19:39 +0000)
author Keith Randall <khr@golang.org>
Mon, 23 Jan 2017 16:22:10 +0000 (08:22 -0800)
committer Keith Randall <khr@golang.org>
Mon, 23 Jan 2017 19:39:22 +0000 (19:39 +0000)
diff --git a/src/runtime/memmove_amd64.s b/src/runtime/memmove_amd64.s

index 464f5fdc1b48b549030f885cb203291c92b2890c..c2286d3edd241ca1801565745d9829110677c432 100644 (file)
--- a/src/runtime/memmove_amd64.s
+++ b/src/runtime/memmove_amd64.s
@@ -146,10 +146,16 @@ move_1or2:
  move_0:
         RET
  move_3or4:
+       CMPQ    BX, $4
+       JB      move_3
+       MOVL    (SI), AX
+       MOVL    AX, (DI)
+       RET
+move_3:
         MOVW    (SI), AX
-       MOVW    -2(SI)(BX*1), CX
+       MOVB    2(SI), CX
         MOVW    AX, (DI)
-       MOVW    CX, -2(DI)(BX*1)
+       MOVB    CX, 2(DI)
         RET
  move_5through7:
         MOVL    (SI), AX
diff --git a/src/runtime/memmove_test.go b/src/runtime/memmove_test.go

index dbfa284c28bc6020592b2d33afdc8207d7fef4a1..74b8753b5f78f4f22f53346c2ce0b3f9113f2980 100644 (file)
--- a/src/runtime/memmove_test.go
+++ b/src/runtime/memmove_test.go
@@ -6,6 +6,7 @@ package runtime_test
  
  import (
         "crypto/rand"
+       "encoding/binary"
         "fmt"
         "internal/race"
         . "runtime"
@@ -447,3 +448,22 @@ func BenchmarkCopyFat1024(b *testing.B) {
                 _ = y
         }
  }
+
+func BenchmarkIssue18740(b *testing.B) {
+       // This tests that memmove uses one 4-byte load/store to move 4 bytes.
+       // It used to do 2 2-byte load/stores, which leads to a pipeline stall
+       // when we try to read the result with one 4-byte load.
+       var buf [4]byte
+       for j := 0; j < b.N; j++ {
+               s := uint32(0)
+               for i := 0; i < 4096; i += 4 {
+                       copy(buf[:], g[i:])
+                       s += binary.LittleEndian.Uint32(buf[:])
+               }
+               sink = uint64(s)
+       }
+}
+
+// TODO: 2 byte and 8 byte benchmarks also.
+
+var g [4096]byte
author	Keith Randall <khr@golang.org>
	Mon, 23 Jan 2017 16:22:10 +0000 (08:22 -0800)
committer	Keith Randall <khr@golang.org>
	Mon, 23 Jan 2017 19:39:22 +0000 (19:39 +0000)
src/runtime/memmove_amd64.s		patch \| blob \| history
src/runtime/memmove_test.go		patch \| blob \| history