Memmove can use AVX/prefetches/other optional instructions, so
only do it for small sizes, when call overhead dominates.
Change-Id: Ice5e93deb11462217f7fb5fc350b703109bb4090
Reviewed-on: https://go-review.googlesource.com/112517
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Michael Munday <mike.munday@ibm.com>
// have fast Move ops.
switch c.arch {
case "amd64", "amd64p32":
- return sz <= 16
+ return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
case "386", "ppc64", "ppc64le", "arm64":
return sz <= 8
case "s390x":
func moveDisjointStack() {
var s [256]byte
// s390x:-".*memmove"
+ // amd64:-".*memmove"
copy(s[:], x[:])
runtime.KeepAlive(&s)
}
-func moveDisjointArg(b *[256]byte) {
+func moveDisjointArg(b *[256]byte) {
var s [256]byte
// s390x:-".*memmove"
+ // amd64:-".*memmove"
copy(s[:], b[:])
runtime.KeepAlive(&s)
}
func moveDisjointNoOverlap(a *[256]byte) {
// s390x:-".*memmove"
+ // amd64:-".*memmove"
copy(a[:], a[128:])
}