]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: refactor cpu feature detection for 386 & amd64
authorMartin Möhrmann <moehrmann@google.com>
Thu, 27 Apr 2017 06:30:27 +0000 (08:30 +0200)
committerMartin Möhrmann <moehrmann@google.com>
Mon, 1 May 2017 20:46:03 +0000 (20:46 +0000)
Changes all cpu features to be detected and stored in bools in rt0_go.

Updates: #15403

Change-Id: I5a9961cdec789b331d09c44d86beb53833d5dc3e
Reviewed-on: https://go-review.googlesource.com/41950
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ilya Tocar <ilya.tocar@intel.com>
Reviewed-by: Keith Randall <khr@golang.org>
src/runtime/alg.go
src/runtime/asm_386.s
src/runtime/asm_amd64.s
src/runtime/asm_amd64p32.s
src/runtime/cpuflags_amd64.go
src/runtime/memclr_386.s
src/runtime/memmove_386.s
src/runtime/memmove_amd64.s
src/runtime/runtime2.go

index 504be61cd01e5296a8536d4e14b8f06967012fcd..8d388da5a2ba83c19e2f28e03e8ad2bbe0e7c755 100644 (file)
@@ -283,9 +283,9 @@ func alginit() {
        // Install aes hash algorithm if we have the instructions we need
        if (GOARCH == "386" || GOARCH == "amd64") &&
                GOOS != "nacl" &&
-               cpuid_ecx&(1<<25) != 0 && // aes (aesenc)
-               cpuid_ecx&(1<<9) != 0 && // sse3 (pshufb)
-               cpuid_ecx&(1<<19) != 0 { // sse4.1 (pinsr{d,q})
+               support_aes && // AESENC
+               support_ssse3 && // PSHUFB
+               support_sse41 { // PINSR{D,Q}
                useAeshash = true
                algarray[alg_MEM32].hash = aeshash32
                algarray[alg_MEM64].hash = aeshash64
index eaf8c935ad363131a829ca69924b1d1e85678235..dc5db91ea856eb7e803f5111636df2c274f0c2fe 100644 (file)
@@ -75,24 +75,81 @@ notintel:
        MOVL    $1, AX
        CPUID
        MOVL    CX, DI // Move to global variable clobbers CX when generating PIC
-       MOVL    AX, runtime·cpuid_eax(SB)
+       MOVL    AX, runtime·processorVersionInfo(SB)
        MOVL    DI, runtime·cpuid_ecx(SB)
        MOVL    DX, runtime·cpuid_edx(SB)
 
        // Check for MMX support
-       TESTL   $(1<<23), DX    // MMX
-       JZ      bad_proc
+       TESTL   $(1<<23), DX // MMX
+       JZ      bad_proc
 
+       TESTL   $(1<<26), DX // SSE2
+       SETNE   runtime·support_sse2(SB)
+
+       TESTL   $(1<<9), DI // SSSE3
+       SETNE   runtime·support_ssse3(SB)
+
+       TESTL   $(1<<19), DI // SSE4.1
+       SETNE   runtime·support_sse41(SB)
+
+       TESTL   $(1<<20), DI // SSE4.2
+       SETNE   runtime·support_sse42(SB)
+
+       TESTL   $(1<<23), DI // POPCNT
+       SETNE   runtime·support_popcnt(SB)
+
+       TESTL   $(1<<25), DI // AES
+       SETNE   runtime·support_aes(SB)
+
+       TESTL   $(1<<27), DI // OSXSAVE
+       SETNE   runtime·support_osxsave(SB)
+
+       // If OS support for XMM and YMM is not present
+       // support_avx will be set back to false later.
+       TESTL   $(1<<28), DI // AVX
+       SETNE   runtime·support_avx(SB)
+
+eax7:
        // Load EAX=7/ECX=0 cpuid flags
        CMPL    SI, $7
-       JLT     nocpuinfo
+       JLT     osavx
        MOVL    $7, AX
        MOVL    $0, CX
        CPUID
        MOVL    BX, runtime·cpuid_ebx7(SB)
 
-nocpuinfo:     
+       TESTL   $(1<<3), BX // BMI1
+       SETNE   runtime·support_bmi1(SB)
+
+       // If OS support for XMM and YMM is not present
+       // support_avx2 will be set back to false later.
+       TESTL   $(1<<5), BX
+       SETNE   runtime·support_avx2(SB)
+
+       TESTL   $(1<<8), BX // BMI2
+       SETNE   runtime·support_bmi2(SB)
+
+       TESTL   $(1<<9), BX // ERMS
+       SETNE   runtime·support_erms(SB)
+
+osavx:
+       // nacl does not support XGETBV to test
+       // for XMM and YMM OS support.
+#ifndef GOOS_nacl
+       CMPB    runtime·support_osxsave(SB), $1
+       JNE     noavx
+       MOVL    $0, CX
+       // For XGETBV, OSXSAVE bit is required and sufficient
+       XGETBV
+       ANDL    $6, AX
+       CMPL    AX, $6 // Check for OS support of XMM and YMM registers.
+       JE nocpuinfo
+#endif
+noavx:
+       MOVB $0, runtime·support_avx(SB)
+       MOVB $0, runtime·support_avx2(SB)
 
+nocpuinfo:
        // if there is an _cgo_init, call it to let it
        // initialize and to set up GS.  if not,
        // we set up GS ourselves.
@@ -803,8 +860,8 @@ TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8
 
 // func cputicks() int64
 TEXT runtime·cputicks(SB),NOSPLIT,$0-8
-       TESTL   $0x4000000, runtime·cpuid_edx(SB) // no sse2, no mfence
-       JEQ     done
+       CMPB    runtime·support_sse2(SB), $1
+       JNE     done
        CMPB    runtime·lfenceBeforeRdtsc(SB), $1
        JNE     mfence
        BYTE    $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE
@@ -1311,8 +1368,8 @@ TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
 hugeloop:
        CMPL    BX, $64
        JB      bigloop
-       TESTL   $0x4000000, runtime·cpuid_edx(SB) // check for sse2
-       J     bigloop
+       CMPB    runtime·support_sse2(SB), $1
+       JNE     bigloop
        MOVOU   (SI), X0
        MOVOU   (DI), X1
        MOVOU   16(SI), X2
@@ -1455,8 +1512,8 @@ TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
        JEQ     allsame
        CMPL    BP, $4
        JB      small
-       TESTL   $0x4000000, runtime·cpuid_edx(SB) // check for sse2
-       J     mediumloop
+       CMPB    runtime·support_sse2(SB), $1
+       JNE     mediumloop
 largeloop:
        CMPL    BP, $16
        JB      mediumloop
index 65bbf63bf10a383439a2a4776d5944af1be0d719..0dc9a9c5427f09232358b7fd7f6ae5e0479c053a 100644 (file)
@@ -26,10 +26,10 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
        MOVQ    SP, (g_stack+stack_hi)(DI)
 
        // find out information about the processor we're on
-       MOVQ    $0, AX
+       MOVL    $0, AX
        CPUID
-       MOVQ    AX, SI
-       CMPQ    AX, $0
+       MOVL    AX, SI
+       CMPL    AX, $0
        JE      nocpuinfo
 
        // Figure out how to serialize RDTSC.
@@ -46,62 +46,75 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
 notintel:
 
        // Load EAX=1 cpuid flags
-       MOVQ    $1, AX
+       MOVL    $1, AX
        CPUID
-       MOVL    AX, runtime·cpuid_eax(SB)
+       MOVL    AX, runtime·processorVersionInfo(SB)
        MOVL    CX, runtime·cpuid_ecx(SB)
        MOVL    DX, runtime·cpuid_edx(SB)
 
+       TESTL   $(1<<26), DX // SSE2
+       SETNE   runtime·support_sse2(SB)
+
+       TESTL   $(1<<9), CX // SSSE3
+       SETNE   runtime·support_ssse3(SB)
+
+       TESTL   $(1<<19), CX // SSE4.1
+       SETNE   runtime·support_sse41(SB)
+
+       TESTL   $(1<<20), CX // SSE4.2
+       SETNE   runtime·support_sse42(SB)
+
+       TESTL   $(1<<23), CX // POPCNT
+       SETNE   runtime·support_popcnt(SB)
+
+       TESTL   $(1<<25), CX // AES
+       SETNE   runtime·support_aes(SB)
+
+       TESTL   $(1<<27), CX // OSXSAVE
+       SETNE   runtime·support_osxsave(SB)
+
+       // If OS support for XMM and YMM is not present
+       // support_avx will be set back to false later.
+       TESTL   $(1<<28), CX // AVX
+       SETNE   runtime·support_avx(SB)
+
+eax7:
        // Load EAX=7/ECX=0 cpuid flags
-       CMPQ    SI, $7
-       JLT     no7
+       CMPL    SI, $7
+       JLT     osavx
        MOVL    $7, AX
        MOVL    $0, CX
        CPUID
        MOVL    BX, runtime·cpuid_ebx7(SB)
-no7:
-       // Detect AVX and AVX2 as per 14.7.1  Detection of AVX2 chapter of [1]
-       // [1] 64-ia-32-architectures-software-developer-manual-325462.pdf
-       // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
-       MOVL    runtime·cpuid_ecx(SB), CX
-       ANDL    $0x18000000, CX // check for OSXSAVE and AVX bits
-       CMPL    CX, $0x18000000
-       JNE     noavx
-       MOVL    $0, CX
+
+       TESTL   $(1<<3), BX // BMI1
+       SETNE   runtime·support_bmi1(SB)
+
+       // If OS support for XMM and YMM is not present
+       // support_avx2 will be set back to false later.
+       TESTL   $(1<<5), BX
+       SETNE   runtime·support_avx2(SB)
+
+       TESTL   $(1<<8), BX // BMI2
+       SETNE   runtime·support_bmi2(SB)
+
+       TESTL   $(1<<9), BX // ERMS
+       SETNE   runtime·support_erms(SB)
+
+osavx:
+       CMPB    runtime·support_osxsave(SB), $1
+       JNE     noavx
+       MOVL    $0, CX
        // For XGETBV, OSXSAVE bit is required and sufficient
        XGETBV
-       ANDL    $6, AX
-       CMPL    AX, $6 // Check for OS support of YMM registers
-       JNE     noavx
-       MOVB    $1, runtime·support_avx(SB)
-       TESTL   $(1<<5), runtime·cpuid_ebx7(SB) // check for AVX2 bit
-       JEQ     noavx2
-       MOVB    $1, runtime·support_avx2(SB)
-       JMP     testbmi1
+       ANDL    $6, AX
+       CMPL    AX, $6 // Check for OS support of XMM and YMM registers.
+       JE nocpuinfo
 noavx:
-       MOVB    $0, runtime·support_avx(SB)
-noavx2:
-       MOVB    $0, runtime·support_avx2(SB)
-testbmi1:
-       // Detect BMI1 and BMI2 extensions as per
-       // 5.1.16.1 Detection of VEX-encoded GPR Instructions,
-       //   LZCNT and TZCNT, PREFETCHW chapter of [1]
-       MOVB    $0, runtime·support_bmi1(SB)
-       TESTL   $(1<<3), runtime·cpuid_ebx7(SB) // check for BMI1 bit
-       JEQ     testbmi2
-       MOVB    $1, runtime·support_bmi1(SB)
-testbmi2:
-       MOVB    $0, runtime·support_bmi2(SB)
-       TESTL   $(1<<8), runtime·cpuid_ebx7(SB) // check for BMI2 bit
-       JEQ     testpopcnt
-       MOVB    $1, runtime·support_bmi2(SB)
-testpopcnt:
-       MOVB    $0, runtime·support_popcnt(SB)
-       TESTL   $(1<<23), runtime·cpuid_ecx(SB) // check for POPCNT bit
-       JEQ     nocpuinfo
-       MOVB    $1, runtime·support_popcnt(SB)
-nocpuinfo:     
-       
+       MOVB $0, runtime·support_avx(SB)
+       MOVB $0, runtime·support_avx2(SB)
+
+nocpuinfo:
        // if there is an _cgo_init, call it.
        MOVQ    _cgo_init(SB), AX
        TESTQ   AX, AX
@@ -1942,9 +1955,8 @@ success_avx2:
        VZEROUPPER
        JMP success
 sse42:
-       MOVL runtime·cpuid_ecx(SB), CX
-       ANDL $0x100000, CX
-       JZ no_sse42
+       CMPB runtime·support_sse42(SB), $1
+       JNE no_sse42
        CMPQ AX, $12
        // PCMPESTRI is slower than normal compare,
        // so using it makes sense only if we advance 4+ bytes per compare
index 14c2213384323e5c146a8520c9c671b9c870a775..e97674cc8426c5e23bc28f721bc41a3f84002a25 100644 (file)
@@ -28,9 +28,9 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
        MOVL    SP, (g_stack+stack_hi)(DI)
 
        // find out information about the processor we're on
-       MOVQ    $0, AX
+       MOVL    $0, AX
        CPUID
-       CMPQ    AX, $0
+       CMPL    AX, $0
        JE      nocpuinfo
 
        CMPL    BX, $0x756E6547  // "Genu"
@@ -42,13 +42,81 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
        MOVB    $1, runtime·isIntel(SB)
 notintel:
 
-       MOVQ    $1, AX
+       // Load EAX=1 cpuid flags
+       MOVL    $1, AX
        CPUID
-       MOVL    AX, runtime·cpuid_eax(SB)
+       MOVL    AX, runtime·processorVersionInfo(SB)
        MOVL    CX, runtime·cpuid_ecx(SB)
        MOVL    DX, runtime·cpuid_edx(SB)
-nocpuinfo:     
-       
+
+       TESTL   $(1<<26), DX // SSE2
+       SETNE   runtime·support_sse2(SB)
+
+       TESTL   $(1<<9), CX // SSSE3
+       SETNE   runtime·support_ssse3(SB)
+
+       TESTL   $(1<<19), CX // SSE4.1
+       SETNE   runtime·support_sse41(SB)
+
+       TESTL   $(1<<20), CX // SSE4.2
+       SETNE   runtime·support_sse42(SB)
+
+       TESTL   $(1<<23), CX // POPCNT
+       SETNE   runtime·support_popcnt(SB)
+
+       TESTL   $(1<<25), CX // AES
+       SETNE   runtime·support_aes(SB)
+
+       TESTL   $(1<<27), CX // OSXSAVE
+       SETNE   runtime·support_osxsave(SB)
+
+       // If OS support for XMM and YMM is not present
+       // support_avx will be set back to false later.
+       TESTL   $(1<<28), CX // AVX
+       SETNE   runtime·support_avx(SB)
+
+eax7:
+       // Load EAX=7/ECX=0 cpuid flags
+       CMPL    SI, $7
+       JLT     osavx
+       MOVL    $7, AX
+       MOVL    $0, CX
+       CPUID
+       MOVL    BX, runtime·cpuid_ebx7(SB)
+
+       TESTL   $(1<<3), BX // BMI1
+       SETNE   runtime·support_bmi1(SB)
+
+       // If OS support for XMM and YMM is not present
+       // support_avx2 will be set back to false later.
+       TESTL   $(1<<5), BX
+       SETNE   runtime·support_avx2(SB)
+
+       TESTL   $(1<<8), BX // BMI2
+       SETNE   runtime·support_bmi2(SB)
+
+       TESTL   $(1<<9), BX // ERMS
+       SETNE   runtime·support_erms(SB)
+
+osavx:
+       // nacl does not support XGETBV to test
+       // for XMM and YMM OS support.
+#ifndef GOOS_nacl
+       CMPB    runtime·support_osxsave(SB), $1
+       JNE     noavx
+       MOVL    $0, CX
+       // For XGETBV, OSXSAVE bit is required and sufficient
+       XGETBV
+       ANDL    $6, AX
+       CMPL    AX, $6 // Check for OS support of XMM and YMM registers.
+       JE nocpuinfo
+#endif
+noavx:
+       MOVB $0, runtime·support_avx(SB)
+       MOVB $0, runtime·support_avx2(SB)
+
+nocpuinfo:
+
 needtls:
        LEAL    runtime·m0+m_tls(SB), DI
        CALL    runtime·settls(SB)
index 3a463487a9f6f09ea18e5b68a56671c8820b114e..3e408dae5f1a5d4bfba4cd7f81b966eb29992e7e 100644 (file)
@@ -8,13 +8,13 @@ var useAVXmemmove bool
 
 func init() {
        // Let's remove stepping and reserved fields
-       processorVersionInfo := cpuid_eax & 0x0FFF3FF0
+       processor := processorVersionInfo & 0x0FFF3FF0
 
        isIntelBridgeFamily := isIntel &&
-               (processorVersionInfo == 0x206A0 ||
-                       processorVersionInfo == 0x206D0 ||
-                       processorVersionInfo == 0x306A0 ||
-                       processorVersionInfo == 0x306E0)
+               processor == 0x206A0 ||
+               processor == 0x206D0 ||
+               processor == 0x306A0 ||
+               processor == 0x306E0
 
        useAVXmemmove = support_avx && !isIntelBridgeFamily
 }
index ef6e60287cdfd04acaf76d9ed6c1576933d7415c..1adb26b4524e04817bfaf5faf5f1b5eb4a302128 100644 (file)
@@ -27,8 +27,8 @@ tail:
        JBE     _5through8
        CMPL    BX, $16
        JBE     _9through16
-       TESTL   $0x4000000, runtime·cpuid_edx(SB) // check for sse2
-       JEQ     nosse2
+       CMPB    runtime·support_sse2(SB), $1
+       JNE     nosse2
        PXOR    X0, X0
        CMPL    BX, $32
        JBE     _17through32
index b712ea182aef51ea5f2dd486782f3a746e229850..e76201b48b45a0b9c7538d3156c07dbfb9e0ba42 100644 (file)
@@ -49,8 +49,8 @@ tail:
        JBE     move_5through8
        CMPL    BX, $16
        JBE     move_9through16
-       TESTL   $0x4000000, runtime·cpuid_edx(SB) // check for sse2
-       JEQ     nosse2
+       CMPB    runtime·support_sse2(SB), $1
+       JNE     nosse2
        CMPL    BX, $32
        JBE     move_17through32
        CMPL    BX, $64
@@ -71,8 +71,8 @@ nosse2:
  */
 forward:
        // If REP MOVSB isn't fast, don't use it
-       TESTL   $(1<<9), runtime·cpuid_ebx7(SB) // erms, aka enhanced REP MOVSB/STOSB
-       JEQ     fwdBy4
+       CMPB    runtime·support_erms(SB), $1 // enhanced REP MOVSB/STOSB
+       JNE     fwdBy4
 
        // Check alignment
        MOVL    SI, AX
index 510d0d694b49c89fae27a96bb351965406c54e98..21bf8e47e0f4e81555d6ecfa45031b4267a25c8b 100644 (file)
@@ -81,8 +81,8 @@ forward:
        JLS     move_256through2048
 
        // If REP MOVSB isn't fast, don't use it
-       TESTL   $(1<<9), runtime·cpuid_ebx7(SB) // erms, aka enhanced REP MOVSB/STOSB
-       JEQ     fwdBy8
+       CMPB    runtime·support_erms(SB), $1 // enhanced REP MOVSB/STOSB
+       JNE     fwdBy8
 
        // Check alignment
        MOVL    SI, AX
index 13360a9ad342ea29d54f6003b9eab22d54d2ad29..8c4d41d9289353846ba03b39c3b014c57cb32f27 100644 (file)
@@ -728,17 +728,29 @@ var (
 
        // Information about what cpu features are available.
        // Set on startup in asm_{386,amd64,amd64p32}.s.
-       cpuid_eax         uint32
-       cpuid_ecx         uint32
-       cpuid_edx         uint32
-       cpuid_ebx7        uint32 // not set on amd64p32
-       isIntel           bool
-       lfenceBeforeRdtsc bool
-       support_avx       bool
-       support_avx2      bool
-       support_bmi1      bool
-       support_bmi2      bool
-       support_popcnt    bool
+       // Packages outside the runtime should not use these
+       // as they are not an external api.
+       processorVersionInfo uint32
+       isIntel              bool
+       lfenceBeforeRdtsc    bool
+       support_aes          bool
+       support_avx          bool
+       support_avx2         bool
+       support_bmi1         bool
+       support_bmi2         bool
+       support_erms         bool
+       support_osxsave      bool
+       support_popcnt       bool
+       support_sse2         bool
+       support_sse41        bool
+       support_sse42        bool
+       support_ssse3        bool
+
+       // TODO(moehrmann) delete below variables once external
+       // packages have their dependencies on these removed.
+       cpuid_ecx  uint32
+       cpuid_edx  uint32
+       cpuid_ebx7 uint32 // not set on amd64p32
 
        goarm                uint8 // set by cmd/link on arm systems
        framepointer_enabled bool  // set by cmd/link