From: vpachkov Date: Mon, 29 Nov 2021 12:20:37 +0000 (+0300) Subject: runtime: remove AVX2less code in memclrNoHeapPointers for GOAMD64 >= 3 X-Git-Tag: go1.19beta1~834 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=12acf9b0f009305eefc71d4cee8808cc244e86aa;p=gostls13.git runtime: remove AVX2less code in memclrNoHeapPointers for GOAMD64 >= 3 Optimize memclr by removing simple case loop along with the runtime check since AVX2 is guaranteed to be available when compiling with GOAMD64 >= 3 name old speed new speed delta Memclr/5-12 2.70GB/s ± 1% 2.73GB/s ± 1% ~ (p=0.056 n=5+5) Memclr/16-12 7.00GB/s ± 2% 7.03GB/s ± 1% ~ (p=1.000 n=5+5) Memclr/64-12 25.5GB/s ± 1% 25.5GB/s ± 1% ~ (p=0.548 n=5+5) Memclr/256-12 53.4GB/s ± 1% 52.7GB/s ± 2% ~ (p=0.222 n=5+5) Memclr/4096-12 109GB/s ± 1% 129GB/s ± 0% +18.57% (p=0.008 n=5+5) Memclr/65536-12 75.2GB/s ± 2% 78.3GB/s ± 3% +4.14% (p=0.008 n=5+5) Memclr/1M-12 53.5GB/s ± 2% 54.1GB/s ± 2% ~ (p=0.310 n=5+5) Memclr/4M-12 53.1GB/s ± 3% 52.9GB/s ± 2% ~ (p=1.000 n=5+5) Memclr/8M-12 44.6GB/s ± 3% 45.1GB/s ± 3% ~ (p=0.310 n=5+5) Memclr/16M-12 24.8GB/s ± 2% 24.2GB/s ± 2% ~ (p=0.056 n=5+5) Memclr/64M-12 38.3GB/s ± 1% 37.8GB/s ± 1% ~ (p=0.056 n=5+5) [Geo mean] 31.0GB/s 31.5GB/s +1.78% Change-Id: I6f3014f6338cb3b5a1b94503faa205f043fe2de8 Reviewed-on: https://go-review.googlesource.com/c/go/+/367494 Trust: Cherry Mui Trust: Daniel Martí Run-TryBot: Daniel Martí TryBot-Result: Gopher Robot Reviewed-by: Keith Randall --- diff --git a/src/cmd/dist/build.go b/src/cmd/dist/build.go index d224cef2a8..db2ac1f2a6 100644 --- a/src/cmd/dist/build.go +++ b/src/cmd/dist/build.go @@ -732,6 +732,8 @@ func runInstall(pkg string, ch chan struct{}) { pathf("%s/src/runtime/funcdata.h", goroot), 0) copyfile(pathf("%s/pkg/include/asm_ppc64x.h", goroot), pathf("%s/src/runtime/asm_ppc64x.h", goroot), 0) + copyfile(pathf("%s/pkg/include/asm_amd64.h", goroot), + pathf("%s/src/runtime/asm_amd64.h", goroot), 0) } // Generate any missing files; regenerate existing ones. diff --git a/src/runtime/asm_amd64.h b/src/runtime/asm_amd64.h new file mode 100644 index 0000000000..49e0ee2323 --- /dev/null +++ b/src/runtime/asm_amd64.h @@ -0,0 +1,14 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Define features that are guaranteed to be supported by setting the AMD64 variable. +// If a feature is supported, there's no need to check it at runtime every time. + +#ifdef GOAMD64_v3 +#define hasAVX2 +#endif + +#ifdef GOAMD64_v4 +#define hasAVX2 +#endif diff --git a/src/runtime/memclr_amd64.s b/src/runtime/memclr_amd64.s index 700bbd7b9b..26a6205e61 100644 --- a/src/runtime/memclr_amd64.s +++ b/src/runtime/memclr_amd64.s @@ -6,6 +6,7 @@ #include "go_asm.h" #include "textflag.h" +#include "asm_amd64.h" // See memclrNoHeapPointers Go doc for important implementation constraints. @@ -39,6 +40,8 @@ tail: JBE _65through128 CMPQ BX, $256 JBE _129through256 + +#ifndef hasAVX2 CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1 JE loop_preheader_avx2 // TODO: for really big clears, use MOVNTDQ, even without AVX2. @@ -65,6 +68,7 @@ loop: CMPQ BX, $256 JAE loop JMP tail +#endif loop_preheader_avx2: VPXOR Y0, Y0, Y0