]> Cypherpunks repositories - gostls13.git/commitdiff
[release-branch.go1.14] runtime: guard VZEROUPPER on CPU feature
authorCherry Zhang <cherryyz@google.com>
Wed, 26 Feb 2020 01:30:37 +0000 (20:30 -0500)
committerCarlos Amedee <carlos@golang.org>
Mon, 2 Mar 2020 22:30:16 +0000 (22:30 +0000)
In CL 219131 we inserted a VZEROUPPER instruction on darwin/amd64.
The instruction is not available on pre-AVX machines. Guard it
with CPU feature.

Updates #37459.
Fixes #37478.

Change-Id: I9a064df277d091be4ee594eda5c7fd8ee323102b
Reviewed-on: https://go-review.googlesource.com/c/go/+/221057
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
(cherry picked from commit c46ffdd2eca339918ed30b6ba9d4715ba769d35d)
Reviewed-on: https://go-review.googlesource.com/c/go/+/221058
Run-TryBot: Dmitri Shuralyov <dmitshur@golang.org>

src/runtime/cpuflags.go
src/runtime/mkpreempt.go
src/runtime/preempt_amd64.s

index 94f9331d15acad0b663e6d380eb542835376df2c..4bd894d984b813c0d9c1d3aa32ed95992d756330 100644 (file)
@@ -11,6 +11,7 @@ import (
 
 // Offsets into internal/cpu records for use in assembly.
 const (
+       offsetX86HasAVX  = unsafe.Offsetof(cpu.X86.HasAVX)
        offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
        offsetX86HasERMS = unsafe.Offsetof(cpu.X86.HasERMS)
        offsetX86HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2)
index 31b6f5cbac3ad1b9319dcb4fe1d3840dfcf12445..c26406e55f1af971ffc554fd5a98f375e2b097ca 100644 (file)
@@ -244,23 +244,26 @@ func genAMD64() {
 
        // TODO: MXCSR register?
 
+       p("PUSHQ BP")
+       p("MOVQ SP, BP")
+       p("// Save flags before clobbering them")
+       p("PUSHFQ")
+       p("// obj doesn't understand ADD/SUB on SP, but does understand ADJSP")
+       p("ADJSP $%d", l.stack)
+       p("// But vet doesn't know ADJSP, so suppress vet stack checking")
+       p("NOP SP")
+
        // Apparently, the signal handling code path in darwin kernel leaves
        // the upper bits of Y registers in a dirty state, which causes
        // many SSE operations (128-bit and narrower) become much slower.
        // Clear the upper bits to get to a clean state. See issue #37174.
        // It is safe here as Go code don't use the upper bits of Y registers.
        p("#ifdef GOOS_darwin")
+       p("CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0")
+       p("JE 2(PC)")
        p("VZEROUPPER")
        p("#endif")
 
-       p("PUSHQ BP")
-       p("MOVQ SP, BP")
-       p("// Save flags before clobbering them")
-       p("PUSHFQ")
-       p("// obj doesn't understand ADD/SUB on SP, but does understand ADJSP")
-       p("ADJSP $%d", l.stack)
-       p("// But vet doesn't know ADJSP, so suppress vet stack checking")
-       p("NOP SP")
        l.save()
        p("CALL ·asyncPreempt2(SB)")
        l.restore()
index 0f2fd7d8dd4d5b0fbf8b8c2f04253fe3c1333aea..4765e9f448aa9b7f7e025897235e3638a49e9a03 100644 (file)
@@ -4,9 +4,6 @@
 #include "textflag.h"
 
 TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
-       #ifdef GOOS_darwin
-       VZEROUPPER
-       #endif
        PUSHQ BP
        MOVQ SP, BP
        // Save flags before clobbering them
@@ -15,6 +12,11 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
        ADJSP $368
        // But vet doesn't know ADJSP, so suppress vet stack checking
        NOP SP
+       #ifdef GOOS_darwin
+       CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0
+       JE 2(PC)
+       VZEROUPPER
+       #endif
        MOVQ AX, 0(SP)
        MOVQ CX, 8(SP)
        MOVQ DX, 16(SP)