var X86 X86Features
{{range .}}
+{{$f := .}}
{{- if eq .Feature "AVX512"}}
// {{.Feature}} returns whether the CPU supports the AVX512F+CD+BW+DQ+VL features.
//
{{- else -}}
// {{.Feature}} returns whether the CPU supports the {{.Feature}} feature.
{{- end}}
+{{- if ne .ImpliesAll ""}}
+//
+// If it returns true, then the CPU also supports {{.ImpliesAll}}.
+{{- end}}
//
// {{.Feature}} is defined on all GOARCHes, but will only return true on
// GOARCH {{.GoArch}}.
-func (X86Features) {{.Feature}}() bool {
- return cpu.X86.Has{{.Feature}}
+func ({{.FeatureVar}}Features) {{.Feature}}() bool {
+{{- if .Virtual}}
+ return {{range $i, $dep := .Implies}}{{if $i}} && {{end}}cpu.{{$f.FeatureVar}}.Has{{$dep}}{{end}}
+{{- else}}
+ return cpu.{{.FeatureVar}}.Has{{.Feature}}
+{{- end}}
}
{{end}}
`
return buffer
}
+type goarchFeatures struct {
+ // featureVar is the name of the exported feature-check variable for this
+ // architecture.
+ featureVar string
+
+ // features records per-feature information.
+ features map[string]featureInfo
+}
+
+type featureInfo struct {
+ // Implies is a list of other CPU features that are required for this
+ // feature. These are allowed to chain.
+ //
+ // For example, if the Frob feature lists "Baz", then if X.Frob() returns
+ // true, it must also be true that the CPU has feature Baz.
+ Implies []string
+
+ // Virtual means this feature is not represented directly in internal/cpu,
+ // but is instead the logical AND of the features in Implies.
+ Virtual bool
+}
+
+// goarchFeatureInfo maps from GOARCH to CPU feature to additional information
+// about that feature. Not all features need to be in this map.
+var goarchFeatureInfo = make(map[string]goarchFeatures)
+
+func registerFeatureInfo(goArch string, features goarchFeatures) {
+ goarchFeatureInfo[goArch] = features
+}
+
+func featureImplies(goarch string, base string) string {
+ // Compute the transitive closure of base.
+ var list []string
+ var visit func(f string)
+ visit = func(f string) {
+ list = append(list, f)
+ for _, dep := range goarchFeatureInfo[goarch].features[f].Implies {
+ visit(dep)
+ }
+ }
+ visit(base)
+ // Drop base
+ list = list[1:]
+ // Put in "nice" order
+ slices.Reverse(list)
+ // Combine into a comment-ready form
+ switch len(list) {
+ case 0:
+ return ""
+ case 1:
+ return list[0]
+ case 2:
+ return list[0] + " and " + list[1]
+ default:
+ list[len(list)-1] = "and " + list[len(list)-1]
+ return strings.Join(list, ", ")
+ }
+}
+
func writeSIMDFeatures(ops []Operation) *bytes.Buffer {
// Gather all features
type featureKey struct {
featureSet[featureKey{op.GoArch, feature}] = struct{}{}
}
}
- features := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int {
+ featureKeys := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int {
if c := cmp.Compare(a.GoArch, b.GoArch); c != 0 {
return c
}
return compareNatural(a.Feature, b.Feature)
})
+ // TODO: internal/cpu doesn't enforce these at all. You can even do
+ // GODEBUG=cpu.avx=off and it will happily turn off AVX without turning off
+ // AVX2. We need to push these dependencies into it somehow.
+ type feature struct {
+ featureKey
+ FeatureVar string
+ Virtual bool
+ Implies []string
+ ImpliesAll string
+ }
+ var features []feature
+ for _, k := range featureKeys {
+ featureVar := goarchFeatureInfo[k.GoArch].featureVar
+ fi := goarchFeatureInfo[k.GoArch].features[k.Feature]
+ features = append(features, feature{
+ featureKey: k,
+ FeatureVar: featureVar,
+ Virtual: fi.Virtual,
+ Implies: fi.Implies,
+ ImpliesAll: featureImplies(k.GoArch, k.Feature),
+ })
+ }
+
// If we ever have the same feature name on more than one GOARCH, we'll have
// to be more careful about this.
t := templateOf(simdFeaturesTemplate, "features")
package main
import (
- "cmp"
"fmt"
"log"
"maps"
}
log.Printf("%d unhandled CPU features for %d instructions (use -v for details)", len(unknownFeatures), nInst)
} else {
- keys := slices.SortedFunc(maps.Keys(unknownFeatures), func(a, b cpuFeatureKey) int {
- return cmp.Or(cmp.Compare(a.Extension, b.Extension),
- cmp.Compare(a.ISASet, b.ISASet))
- })
+ keys := slices.Sorted(maps.Keys(unknownFeatures))
for _, key := range keys {
- if key.ISASet == "" || key.ISASet == key.Extension {
- log.Printf("unhandled Extension %s", key.Extension)
- } else {
- log.Printf("unhandled Extension %s and ISASet %s", key.Extension, key.ISASet)
- }
+ log.Printf("unhandled ISASet %s", key)
log.Printf(" opcodes: %s", slices.Sorted(maps.Keys(unknownFeatures[key])))
}
}
// decodeCPUFeature returns the CPU feature name required by inst. These match
// the names of the "Has*" feature checks in the simd package.
func decodeCPUFeature(inst *xeddata.Inst) (string, bool) {
- key := cpuFeatureKey{
- Extension: inst.Extension,
- ISASet: isaSetStrip.ReplaceAllLiteralString(inst.ISASet, ""),
+ isaSet := inst.ISASet
+ if isaSet == "" {
+ // Older instructions don't have an ISA set. Use their "extension"
+ // instead.
+ isaSet = inst.Extension
+ }
+ // We require AVX512VL to use AVX512 at all, so strip off the vector length
+ // suffixes.
+ if strings.HasPrefix(isaSet, "AVX512") {
+ isaSet = isaSetVL.ReplaceAllLiteralString(isaSet, "")
}
- feat, ok := cpuFeatureMap[key]
+
+ feat, ok := cpuFeatureMap[isaSet]
if !ok {
- imap := unknownFeatures[key]
+ imap := unknownFeatures[isaSet]
if imap == nil {
imap = make(map[string]struct{})
- unknownFeatures[key] = imap
+ unknownFeatures[isaSet] = imap
}
imap[inst.Opcode()] = struct{}{}
return "", false
return feat, true
}
-var isaSetStrip = regexp.MustCompile("_(128N?|256N?|512)$")
-
-type cpuFeatureKey struct {
- Extension, ISASet string
-}
-
-// cpuFeatureMap maps from XED's "EXTENSION" and "ISA_SET" to a CPU feature name
-// that can be used in the SIMD API.
-var cpuFeatureMap = map[cpuFeatureKey]string{
- {"SHA", "SHA"}: "SHA",
+var isaSetVL = regexp.MustCompile("_(128N?|256N?|512)$")
- {"AVX", ""}: "AVX",
- {"AVX_VNNI", "AVX_VNNI"}: "AVXVNNI",
- {"AVX2", ""}: "AVX2",
- {"AVXAES", ""}: "AVX, AES",
+// cpuFeatureMap maps from XED's "ISA_SET" (or "EXTENSION") to a CPU feature
+// name to expose in the SIMD feature check API.
+//
+// See XED's datafiles/*/cpuid.xed.txt for how ISA set names map to CPUID flags.
+var cpuFeatureMap = map[string]string{
+ "AVX": "AVX",
+ "AVX_VNNI": "AVXVNNI",
+ "AVX2": "AVX2",
+ "AVXAES": "AVXAES",
+ "SHA": "SHA",
// AVX-512 foundational features. We combine all of these into one "AVX512" feature.
- {"AVX512EVEX", "AVX512F"}: "AVX512",
- {"AVX512EVEX", "AVX512CD"}: "AVX512",
- {"AVX512EVEX", "AVX512BW"}: "AVX512",
- {"AVX512EVEX", "AVX512DQ"}: "AVX512",
- // AVX512VL doesn't appear explicitly in the ISASet. I guess it's implied by
- // the vector length suffix.
+ "AVX512F": "AVX512",
+ "AVX512BW": "AVX512",
+ "AVX512CD": "AVX512",
+ "AVX512DQ": "AVX512",
+ // AVX512VL doesn't appear as its own ISASet; instead, the CPUID flag is
+ // required by the *_128 and *_256 ISASets. We fold it into "AVX512" anyway.
// AVX-512 extension features
- {"AVX512EVEX", "AVX512_BITALG"}: "AVX512BITALG",
- {"AVX512EVEX", "AVX512_GFNI"}: "AVX512GFNI",
- {"AVX512EVEX", "AVX512_VBMI2"}: "AVX512VBMI2",
- {"AVX512EVEX", "AVX512_VBMI"}: "AVX512VBMI",
- {"AVX512EVEX", "AVX512_VNNI"}: "AVX512VNNI",
- {"AVX512EVEX", "AVX512_VPOPCNTDQ"}: "AVX512VPOPCNTDQ",
- {"AVX512EVEX", "AVX512_VAES"}: "AVX512VAES",
- {"AVX512EVEX", "AVX512_VPCLMULQDQ"}: "AVX512VPCLMULQDQ",
+ "AVX512_BITALG": "AVX512BITALG",
+ "AVX512_GFNI": "AVX512GFNI",
+ "AVX512_VBMI": "AVX512VBMI",
+ "AVX512_VBMI2": "AVX512VBMI2",
+ "AVX512_VNNI": "AVX512VNNI",
+ "AVX512_VPOPCNTDQ": "AVX512VPOPCNTDQ",
+ "AVX512_VAES": "AVX512VAES",
+ "AVX512_VPCLMULQDQ": "AVX512VPCLMULQDQ",
// AVX 10.2 (not yet supported)
- {"AVX512EVEX", "AVX10_2_RC"}: "ignore",
+ "AVX10_2_RC": "ignore",
+}
+
+func init() {
+ // TODO: In general, Intel doesn't make any guarantees about what flags are
+ // set, so this means our feature checks need to ensure these, just to be
+ // sure.
+ var features = map[string]featureInfo{
+ "AVX2": {Implies: []string{"AVX"}},
+ "AVX512": {Implies: []string{"AVX2"}},
+
+ "AVXAES": {Virtual: true, Implies: []string{"AVX", "AES"}},
+
+ // AVX-512 subfeatures.
+ "AVX512BITALG": {Implies: []string{"AVX512"}},
+ "AVX512GFNI": {Implies: []string{"AVX512"}},
+ "AVX512VBMI": {Implies: []string{"AVX512"}},
+ "AVX512VBMI2": {Implies: []string{"AVX512"}},
+ "AVX512VNNI": {Implies: []string{"AVX512"}},
+ "AVX512VPOPCNTDQ": {Implies: []string{"AVX512"}},
+ "AVX512VAES": {Implies: []string{"AVX512"}},
+
+ // AVX-VNNI and AVX-IFMA are "backports" of the AVX512-VNNI/IFMA
+ // instructions to VEX encoding, limited to 256 bit vectors. They're
+ // intended for lower end CPUs that want to support VNNI/IFMA without
+ // supporting AVX-512. As such, they're built on AVX2's VEX encoding.
+ "AVXVNNI": {Implies: []string{"AVX2"}},
+ "AVXIFMA": {Implies: []string{"AVX2"}},
+ }
+ registerFeatureInfo("amd64", goarchFeatures{
+ featureVar: "X86",
+ features: features,
+ })
}
-var unknownFeatures = map[cpuFeatureKey]map[string]struct{}{}
+var unknownFeatures = map[string]map[string]struct{}{}
// hasOptionalMask returns whether there is an optional mask operand in ops.
func hasOptionalMask(ops []operand) bool {
var X86 X86Features
-// AES returns whether the CPU supports the AES feature.
-//
-// AES is defined on all GOARCHes, but will only return true on
-// GOARCH amd64.
-func (X86Features) AES() bool {
- return cpu.X86.HasAES
-}
-
// AVX returns whether the CPU supports the AVX feature.
//
// AVX is defined on all GOARCHes, but will only return true on
// AVX2 returns whether the CPU supports the AVX2 feature.
//
+// If it returns true, then the CPU also supports AVX.
+//
// AVX2 is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AVX2() bool {
// Nearly every CPU that has shipped with any support for AVX-512 has
// supported all five of these features.
//
+// If it returns true, then the CPU also supports AVX and AVX2.
+//
// AVX512 is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AVX512() bool {
// AVX512BITALG returns whether the CPU supports the AVX512BITALG feature.
//
+// If it returns true, then the CPU also supports AVX, AVX2, and AVX512.
+//
// AVX512BITALG is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AVX512BITALG() bool {
// AVX512GFNI returns whether the CPU supports the AVX512GFNI feature.
//
+// If it returns true, then the CPU also supports AVX, AVX2, and AVX512.
+//
// AVX512GFNI is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AVX512GFNI() bool {
// AVX512VAES returns whether the CPU supports the AVX512VAES feature.
//
+// If it returns true, then the CPU also supports AVX, AVX2, and AVX512.
+//
// AVX512VAES is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AVX512VAES() bool {
// AVX512VBMI returns whether the CPU supports the AVX512VBMI feature.
//
+// If it returns true, then the CPU also supports AVX, AVX2, and AVX512.
+//
// AVX512VBMI is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AVX512VBMI() bool {
// AVX512VBMI2 returns whether the CPU supports the AVX512VBMI2 feature.
//
+// If it returns true, then the CPU also supports AVX, AVX2, and AVX512.
+//
// AVX512VBMI2 is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AVX512VBMI2() bool {
// AVX512VNNI returns whether the CPU supports the AVX512VNNI feature.
//
+// If it returns true, then the CPU also supports AVX, AVX2, and AVX512.
+//
// AVX512VNNI is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AVX512VNNI() bool {
// AVX512VPOPCNTDQ returns whether the CPU supports the AVX512VPOPCNTDQ feature.
//
+// If it returns true, then the CPU also supports AVX, AVX2, and AVX512.
+//
// AVX512VPOPCNTDQ is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AVX512VPOPCNTDQ() bool {
return cpu.X86.HasAVX512VPOPCNTDQ
}
+// AVXAES returns whether the CPU supports the AVXAES feature.
+//
+// If it returns true, then the CPU also supports AES and AVX.
+//
+// AVXAES is defined on all GOARCHes, but will only return true on
+// GOARCH amd64.
+func (X86Features) AVXAES() bool {
+ return cpu.X86.HasAVX && cpu.X86.HasAES
+}
+
// AVXVNNI returns whether the CPU supports the AVXVNNI feature.
//
+// If it returns true, then the CPU also supports AVX and AVX2.
+//
// AVXVNNI is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AVXVNNI() bool {
// y is the chunk of dw array in use.
// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
//
-// Asm: VAESDECLAST, CPU Feature: AVX, AES
+// Asm: VAESDECLAST, CPU Feature: AVXAES
func (x Uint8x16) AESDecryptLastRound(y Uint32x4) Uint8x16
// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// y is the chunk of dw array in use.
// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
//
-// Asm: VAESDEC, CPU Feature: AVX, AES
+// Asm: VAESDEC, CPU Feature: AVXAES
func (x Uint8x16) AESDecryptOneRound(y Uint32x4) Uint8x16
// AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// y is the chunk of w array in use.
// result = AddRoundKey((ShiftRows(SubBytes(x))), y)
//
-// Asm: VAESENCLAST, CPU Feature: AVX, AES
+// Asm: VAESENCLAST, CPU Feature: AVXAES
func (x Uint8x16) AESEncryptLastRound(y Uint32x4) Uint8x16
// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// y is the chunk of w array in use.
// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
//
-// Asm: VAESENC, CPU Feature: AVX, AES
+// Asm: VAESENC, CPU Feature: AVXAES
func (x Uint8x16) AESEncryptOneRound(y Uint32x4) Uint8x16
// AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
// x is the chunk of w array in use.
// result = InvMixColumns(x)
//
-// Asm: VAESIMC, CPU Feature: AVX, AES
+// Asm: VAESIMC, CPU Feature: AVXAES
func (x Uint32x4) AESInvMixColumns() Uint32x4
/* AESRoundKeyGenAssist */
//
// rconVal results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
-// Asm: VAESKEYGENASSIST, CPU Feature: AVX, AES
+// Asm: VAESKEYGENASSIST, CPU Feature: AVXAES
func (x Uint32x4) AESRoundKeyGenAssist(rconVal uint8) Uint32x4
/* Abs */