Changes all cpu features to be detected and stored in bools in rt0_go.
Updates: #15403
Change-Id: I5a9961cdec789b331d09c44d86beb53833d5dc3e
Reviewed-on: https://go-review.googlesource.com/41950
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ilya Tocar <ilya.tocar@intel.com>
Reviewed-by: Keith Randall <khr@golang.org>
// Install aes hash algorithm if we have the instructions we need
if (GOARCH == "386" || GOARCH == "amd64") &&
GOOS != "nacl" &&
- cpuid_ecx&(1<<25) != 0 && // aes (aesenc)
- cpuid_ecx&(1<<9) != 0 && // sse3 (pshufb)
- cpuid_ecx&(1<<19) != 0 { // sse4.1 (pinsr{d,q})
+ support_aes && // AESENC
+ support_ssse3 && // PSHUFB
+ support_sse41 { // PINSR{D,Q}
useAeshash = true
algarray[alg_MEM32].hash = aeshash32
algarray[alg_MEM64].hash = aeshash64
MOVL $1, AX
CPUID
MOVL CX, DI // Move to global variable clobbers CX when generating PIC
- MOVL AX, runtime·cpuid_eax(SB)
+ MOVL AX, runtime·processorVersionInfo(SB)
MOVL DI, runtime·cpuid_ecx(SB)
MOVL DX, runtime·cpuid_edx(SB)
// Check for MMX support
- TESTL $(1<<23), DX // MMX
- JZ bad_proc
+ TESTL $(1<<23), DX // MMX
+ JZ bad_proc
+ TESTL $(1<<26), DX // SSE2
+ SETNE runtime·support_sse2(SB)
+
+ TESTL $(1<<9), DI // SSSE3
+ SETNE runtime·support_ssse3(SB)
+
+ TESTL $(1<<19), DI // SSE4.1
+ SETNE runtime·support_sse41(SB)
+
+ TESTL $(1<<20), DI // SSE4.2
+ SETNE runtime·support_sse42(SB)
+
+ TESTL $(1<<23), DI // POPCNT
+ SETNE runtime·support_popcnt(SB)
+
+ TESTL $(1<<25), DI // AES
+ SETNE runtime·support_aes(SB)
+
+ TESTL $(1<<27), DI // OSXSAVE
+ SETNE runtime·support_osxsave(SB)
+
+ // If OS support for XMM and YMM is not present
+ // support_avx will be set back to false later.
+ TESTL $(1<<28), DI // AVX
+ SETNE runtime·support_avx(SB)
+
+eax7:
// Load EAX=7/ECX=0 cpuid flags
CMPL SI, $7
- JLT nocpuinfo
+ JLT osavx
MOVL $7, AX
MOVL $0, CX
CPUID
MOVL BX, runtime·cpuid_ebx7(SB)
-nocpuinfo:
+ TESTL $(1<<3), BX // BMI1
+ SETNE runtime·support_bmi1(SB)
+
+ // If OS support for XMM and YMM is not present
+ // support_avx2 will be set back to false later.
+ TESTL $(1<<5), BX
+ SETNE runtime·support_avx2(SB)
+
+ TESTL $(1<<8), BX // BMI2
+ SETNE runtime·support_bmi2(SB)
+
+ TESTL $(1<<9), BX // ERMS
+ SETNE runtime·support_erms(SB)
+
+osavx:
+ // nacl does not support XGETBV to test
+ // for XMM and YMM OS support.
+#ifndef GOOS_nacl
+ CMPB runtime·support_osxsave(SB), $1
+ JNE noavx
+ MOVL $0, CX
+ // For XGETBV, OSXSAVE bit is required and sufficient
+ XGETBV
+ ANDL $6, AX
+ CMPL AX, $6 // Check for OS support of XMM and YMM registers.
+ JE nocpuinfo
+#endif
+noavx:
+ MOVB $0, runtime·support_avx(SB)
+ MOVB $0, runtime·support_avx2(SB)
+nocpuinfo:
// if there is an _cgo_init, call it to let it
// initialize and to set up GS. if not,
// we set up GS ourselves.
// func cputicks() int64
TEXT runtime·cputicks(SB),NOSPLIT,$0-8
- TESTL $0x4000000, runtime·cpuid_edx(SB) // no sse2, no mfence
- JEQ done
+ CMPB runtime·support_sse2(SB), $1
+ JNE done
CMPB runtime·lfenceBeforeRdtsc(SB), $1
JNE mfence
BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE
hugeloop:
CMPL BX, $64
JB bigloop
- TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
- JE bigloop
+ CMPB runtime·support_sse2(SB), $1
+ JNE bigloop
MOVOU (SI), X0
MOVOU (DI), X1
MOVOU 16(SI), X2
JEQ allsame
CMPL BP, $4
JB small
- TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
- JE mediumloop
+ CMPB runtime·support_sse2(SB), $1
+ JNE mediumloop
largeloop:
CMPL BP, $16
JB mediumloop
MOVQ SP, (g_stack+stack_hi)(DI)
// find out information about the processor we're on
- MOVQ $0, AX
+ MOVL $0, AX
CPUID
- MOVQ AX, SI
- CMPQ AX, $0
+ MOVL AX, SI
+ CMPL AX, $0
JE nocpuinfo
// Figure out how to serialize RDTSC.
notintel:
// Load EAX=1 cpuid flags
- MOVQ $1, AX
+ MOVL $1, AX
CPUID
- MOVL AX, runtime·cpuid_eax(SB)
+ MOVL AX, runtime·processorVersionInfo(SB)
MOVL CX, runtime·cpuid_ecx(SB)
MOVL DX, runtime·cpuid_edx(SB)
+ TESTL $(1<<26), DX // SSE2
+ SETNE runtime·support_sse2(SB)
+
+ TESTL $(1<<9), CX // SSSE3
+ SETNE runtime·support_ssse3(SB)
+
+ TESTL $(1<<19), CX // SSE4.1
+ SETNE runtime·support_sse41(SB)
+
+ TESTL $(1<<20), CX // SSE4.2
+ SETNE runtime·support_sse42(SB)
+
+ TESTL $(1<<23), CX // POPCNT
+ SETNE runtime·support_popcnt(SB)
+
+ TESTL $(1<<25), CX // AES
+ SETNE runtime·support_aes(SB)
+
+ TESTL $(1<<27), CX // OSXSAVE
+ SETNE runtime·support_osxsave(SB)
+
+ // If OS support for XMM and YMM is not present
+ // support_avx will be set back to false later.
+ TESTL $(1<<28), CX // AVX
+ SETNE runtime·support_avx(SB)
+
+eax7:
// Load EAX=7/ECX=0 cpuid flags
- CMPQ SI, $7
- JLT no7
+ CMPL SI, $7
+ JLT osavx
MOVL $7, AX
MOVL $0, CX
CPUID
MOVL BX, runtime·cpuid_ebx7(SB)
-no7:
- // Detect AVX and AVX2 as per 14.7.1 Detection of AVX2 chapter of [1]
- // [1] 64-ia-32-architectures-software-developer-manual-325462.pdf
- // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
- MOVL runtime·cpuid_ecx(SB), CX
- ANDL $0x18000000, CX // check for OSXSAVE and AVX bits
- CMPL CX, $0x18000000
- JNE noavx
- MOVL $0, CX
+
+ TESTL $(1<<3), BX // BMI1
+ SETNE runtime·support_bmi1(SB)
+
+ // If OS support for XMM and YMM is not present
+ // support_avx2 will be set back to false later.
+ TESTL $(1<<5), BX
+ SETNE runtime·support_avx2(SB)
+
+ TESTL $(1<<8), BX // BMI2
+ SETNE runtime·support_bmi2(SB)
+
+ TESTL $(1<<9), BX // ERMS
+ SETNE runtime·support_erms(SB)
+
+osavx:
+ CMPB runtime·support_osxsave(SB), $1
+ JNE noavx
+ MOVL $0, CX
// For XGETBV, OSXSAVE bit is required and sufficient
XGETBV
- ANDL $6, AX
- CMPL AX, $6 // Check for OS support of YMM registers
- JNE noavx
- MOVB $1, runtime·support_avx(SB)
- TESTL $(1<<5), runtime·cpuid_ebx7(SB) // check for AVX2 bit
- JEQ noavx2
- MOVB $1, runtime·support_avx2(SB)
- JMP testbmi1
+ ANDL $6, AX
+ CMPL AX, $6 // Check for OS support of XMM and YMM registers.
+ JE nocpuinfo
noavx:
- MOVB $0, runtime·support_avx(SB)
-noavx2:
- MOVB $0, runtime·support_avx2(SB)
-testbmi1:
- // Detect BMI1 and BMI2 extensions as per
- // 5.1.16.1 Detection of VEX-encoded GPR Instructions,
- // LZCNT and TZCNT, PREFETCHW chapter of [1]
- MOVB $0, runtime·support_bmi1(SB)
- TESTL $(1<<3), runtime·cpuid_ebx7(SB) // check for BMI1 bit
- JEQ testbmi2
- MOVB $1, runtime·support_bmi1(SB)
-testbmi2:
- MOVB $0, runtime·support_bmi2(SB)
- TESTL $(1<<8), runtime·cpuid_ebx7(SB) // check for BMI2 bit
- JEQ testpopcnt
- MOVB $1, runtime·support_bmi2(SB)
-testpopcnt:
- MOVB $0, runtime·support_popcnt(SB)
- TESTL $(1<<23), runtime·cpuid_ecx(SB) // check for POPCNT bit
- JEQ nocpuinfo
- MOVB $1, runtime·support_popcnt(SB)
-nocpuinfo:
-
+ MOVB $0, runtime·support_avx(SB)
+ MOVB $0, runtime·support_avx2(SB)
+
+nocpuinfo:
// if there is an _cgo_init, call it.
MOVQ _cgo_init(SB), AX
TESTQ AX, AX
VZEROUPPER
JMP success
sse42:
- MOVL runtime·cpuid_ecx(SB), CX
- ANDL $0x100000, CX
- JZ no_sse42
+ CMPB runtime·support_sse42(SB), $1
+ JNE no_sse42
CMPQ AX, $12
// PCMPESTRI is slower than normal compare,
// so using it makes sense only if we advance 4+ bytes per compare
MOVL SP, (g_stack+stack_hi)(DI)
// find out information about the processor we're on
- MOVQ $0, AX
+ MOVL $0, AX
CPUID
- CMPQ AX, $0
+ CMPL AX, $0
JE nocpuinfo
CMPL BX, $0x756E6547 // "Genu"
MOVB $1, runtime·isIntel(SB)
notintel:
- MOVQ $1, AX
+ // Load EAX=1 cpuid flags
+ MOVL $1, AX
CPUID
- MOVL AX, runtime·cpuid_eax(SB)
+ MOVL AX, runtime·processorVersionInfo(SB)
MOVL CX, runtime·cpuid_ecx(SB)
MOVL DX, runtime·cpuid_edx(SB)
-nocpuinfo:
-
+
+ TESTL $(1<<26), DX // SSE2
+ SETNE runtime·support_sse2(SB)
+
+ TESTL $(1<<9), CX // SSSE3
+ SETNE runtime·support_ssse3(SB)
+
+ TESTL $(1<<19), CX // SSE4.1
+ SETNE runtime·support_sse41(SB)
+
+ TESTL $(1<<20), CX // SSE4.2
+ SETNE runtime·support_sse42(SB)
+
+ TESTL $(1<<23), CX // POPCNT
+ SETNE runtime·support_popcnt(SB)
+
+ TESTL $(1<<25), CX // AES
+ SETNE runtime·support_aes(SB)
+
+ TESTL $(1<<27), CX // OSXSAVE
+ SETNE runtime·support_osxsave(SB)
+
+ // If OS support for XMM and YMM is not present
+ // support_avx will be set back to false later.
+ TESTL $(1<<28), CX // AVX
+ SETNE runtime·support_avx(SB)
+
+eax7:
+ // Load EAX=7/ECX=0 cpuid flags
+ CMPL SI, $7
+ JLT osavx
+ MOVL $7, AX
+ MOVL $0, CX
+ CPUID
+ MOVL BX, runtime·cpuid_ebx7(SB)
+
+ TESTL $(1<<3), BX // BMI1
+ SETNE runtime·support_bmi1(SB)
+
+ // If OS support for XMM and YMM is not present
+ // support_avx2 will be set back to false later.
+ TESTL $(1<<5), BX
+ SETNE runtime·support_avx2(SB)
+
+ TESTL $(1<<8), BX // BMI2
+ SETNE runtime·support_bmi2(SB)
+
+ TESTL $(1<<9), BX // ERMS
+ SETNE runtime·support_erms(SB)
+
+osavx:
+ // nacl does not support XGETBV to test
+ // for XMM and YMM OS support.
+#ifndef GOOS_nacl
+ CMPB runtime·support_osxsave(SB), $1
+ JNE noavx
+ MOVL $0, CX
+ // For XGETBV, OSXSAVE bit is required and sufficient
+ XGETBV
+ ANDL $6, AX
+ CMPL AX, $6 // Check for OS support of XMM and YMM registers.
+ JE nocpuinfo
+#endif
+noavx:
+ MOVB $0, runtime·support_avx(SB)
+ MOVB $0, runtime·support_avx2(SB)
+
+nocpuinfo:
+
needtls:
LEAL runtime·m0+m_tls(SB), DI
CALL runtime·settls(SB)
func init() {
// Let's remove stepping and reserved fields
- processorVersionInfo := cpuid_eax & 0x0FFF3FF0
+ processor := processorVersionInfo & 0x0FFF3FF0
isIntelBridgeFamily := isIntel &&
- (processorVersionInfo == 0x206A0 ||
- processorVersionInfo == 0x206D0 ||
- processorVersionInfo == 0x306A0 ||
- processorVersionInfo == 0x306E0)
+ processor == 0x206A0 ||
+ processor == 0x206D0 ||
+ processor == 0x306A0 ||
+ processor == 0x306E0
useAVXmemmove = support_avx && !isIntelBridgeFamily
}
JBE _5through8
CMPL BX, $16
JBE _9through16
- TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
- JEQ nosse2
+ CMPB runtime·support_sse2(SB), $1
+ JNE nosse2
PXOR X0, X0
CMPL BX, $32
JBE _17through32
JBE move_5through8
CMPL BX, $16
JBE move_9through16
- TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
- JEQ nosse2
+ CMPB runtime·support_sse2(SB), $1
+ JNE nosse2
CMPL BX, $32
JBE move_17through32
CMPL BX, $64
*/
forward:
// If REP MOVSB isn't fast, don't use it
- TESTL $(1<<9), runtime·cpuid_ebx7(SB) // erms, aka enhanced REP MOVSB/STOSB
- JEQ fwdBy4
+ CMPB runtime·support_erms(SB), $1 // enhanced REP MOVSB/STOSB
+ JNE fwdBy4
// Check alignment
MOVL SI, AX
JLS move_256through2048
// If REP MOVSB isn't fast, don't use it
- TESTL $(1<<9), runtime·cpuid_ebx7(SB) // erms, aka enhanced REP MOVSB/STOSB
- JEQ fwdBy8
+ CMPB runtime·support_erms(SB), $1 // enhanced REP MOVSB/STOSB
+ JNE fwdBy8
// Check alignment
MOVL SI, AX
// Information about what cpu features are available.
// Set on startup in asm_{386,amd64,amd64p32}.s.
- cpuid_eax uint32
- cpuid_ecx uint32
- cpuid_edx uint32
- cpuid_ebx7 uint32 // not set on amd64p32
- isIntel bool
- lfenceBeforeRdtsc bool
- support_avx bool
- support_avx2 bool
- support_bmi1 bool
- support_bmi2 bool
- support_popcnt bool
+ // Packages outside the runtime should not use these
+ // as they are not an external api.
+ processorVersionInfo uint32
+ isIntel bool
+ lfenceBeforeRdtsc bool
+ support_aes bool
+ support_avx bool
+ support_avx2 bool
+ support_bmi1 bool
+ support_bmi2 bool
+ support_erms bool
+ support_osxsave bool
+ support_popcnt bool
+ support_sse2 bool
+ support_sse41 bool
+ support_sse42 bool
+ support_ssse3 bool
+
+ // TODO(moehrmann) delete below variables once external
+ // packages have their dependencies on these removed.
+ cpuid_ecx uint32
+ cpuid_edx uint32
+ cpuid_ebx7 uint32 // not set on amd64p32
goarm uint8 // set by cmd/link on arm systems
framepointer_enabled bool // set by cmd/link