]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: remove AVX2less code in memclrNoHeapPointers for GOAMD64 >= 3
authorvpachkov <slava.pach@gmail.com>
Mon, 29 Nov 2021 12:20:37 +0000 (15:20 +0300)
committerKeith Randall <khr@golang.org>
Thu, 31 Mar 2022 19:46:55 +0000 (19:46 +0000)
Optimize memclr by removing simple case loop along with the
runtime check since AVX2 is guaranteed to be available when
compiling with GOAMD64 >= 3

name             old speed      new speed      delta
Memclr/5-12      2.70GB/s ± 1%  2.73GB/s ± 1%     ~     (p=0.056 n=5+5)
Memclr/16-12     7.00GB/s ± 2%  7.03GB/s ± 1%     ~     (p=1.000 n=5+5)
Memclr/64-12     25.5GB/s ± 1%  25.5GB/s ± 1%     ~     (p=0.548 n=5+5)
Memclr/256-12    53.4GB/s ± 1%  52.7GB/s ± 2%     ~     (p=0.222 n=5+5)
Memclr/4096-12    109GB/s ± 1%   129GB/s ± 0%  +18.57%  (p=0.008 n=5+5)
Memclr/65536-12  75.2GB/s ± 2%  78.3GB/s ± 3%   +4.14%  (p=0.008 n=5+5)
Memclr/1M-12     53.5GB/s ± 2%  54.1GB/s ± 2%     ~     (p=0.310 n=5+5)
Memclr/4M-12     53.1GB/s ± 3%  52.9GB/s ± 2%     ~     (p=1.000 n=5+5)
Memclr/8M-12     44.6GB/s ± 3%  45.1GB/s ± 3%     ~     (p=0.310 n=5+5)
Memclr/16M-12    24.8GB/s ± 2%  24.2GB/s ± 2%     ~     (p=0.056 n=5+5)
Memclr/64M-12    38.3GB/s ± 1%  37.8GB/s ± 1%     ~     (p=0.056 n=5+5)
[Geo mean]       31.0GB/s       31.5GB/s        +1.78%

Change-Id: I6f3014f6338cb3b5a1b94503faa205f043fe2de8
Reviewed-on: https://go-review.googlesource.com/c/go/+/367494
Trust: Cherry Mui <cherryyz@google.com>
Trust: Daniel Martí <mvdan@mvdan.cc>
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/dist/build.go
src/runtime/asm_amd64.h [new file with mode: 0644]
src/runtime/memclr_amd64.s

index d224cef2a82c346afc94c98abf66f98df26575a1..db2ac1f2a6e505ed422c9f76b839865b98ab0272 100644 (file)
@@ -732,6 +732,8 @@ func runInstall(pkg string, ch chan struct{}) {
                        pathf("%s/src/runtime/funcdata.h", goroot), 0)
                copyfile(pathf("%s/pkg/include/asm_ppc64x.h", goroot),
                        pathf("%s/src/runtime/asm_ppc64x.h", goroot), 0)
+               copyfile(pathf("%s/pkg/include/asm_amd64.h", goroot),
+                       pathf("%s/src/runtime/asm_amd64.h", goroot), 0)
        }
 
        // Generate any missing files; regenerate existing ones.
diff --git a/src/runtime/asm_amd64.h b/src/runtime/asm_amd64.h
new file mode 100644 (file)
index 0000000..49e0ee2
--- /dev/null
@@ -0,0 +1,14 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Define features that are guaranteed to be supported by setting the AMD64 variable.
+// If a feature is supported, there's no need to check it at runtime every time.
+
+#ifdef GOAMD64_v3
+#define hasAVX2
+#endif
+
+#ifdef GOAMD64_v4
+#define hasAVX2
+#endif
index 700bbd7b9b0e2d865953fcfd2879eff690f04d3e..26a6205e615b243cb7ccd1a22c60d074fbe48ab8 100644 (file)
@@ -6,6 +6,7 @@
 
 #include "go_asm.h"
 #include "textflag.h"
+#include "asm_amd64.h"
 
 // See memclrNoHeapPointers Go doc for important implementation constraints.
 
@@ -39,6 +40,8 @@ tail:
        JBE     _65through128
        CMPQ    BX, $256
        JBE     _129through256
+
+#ifndef hasAVX2
        CMPB    internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
        JE loop_preheader_avx2
        // TODO: for really big clears, use MOVNTDQ, even without AVX2.
@@ -65,6 +68,7 @@ loop:
        CMPQ    BX, $256
        JAE     loop
        JMP     tail
+#endif
 
 loop_preheader_avx2:
        VPXOR Y0, Y0, Y0