Size int // e.g. 128
ArgsLoadAddr string // [Args] with its last vreg arg being a concrete "(VMOVDQUload* ptr mem)", and might contain mask.
ArgsAddr string // [Args] with its last vreg arg being replaced by "ptr", and might contain mask, and with a "mem" at the end.
+ FeatCheck string // e.g. "v.Block.CPUfeatures.hasFeature(CPUavx512)" -- for a ssa/_gen rules file.
}
var (
{{end}}
{{define "vregMem"}}({{.Asm}} {{.ArgsLoadAddr}}) && canMergeLoad(v, l) && clobber(l) => ({{.Asm}}load {{.ArgsAddr}})
{{end}}
+{{define "vregMemFeatCheck"}}({{.Asm}} {{.ArgsLoadAddr}}) && {{.FeatCheck}} && canMergeLoad(v, l) && clobber(l)=> ({{.Asm}}load {{.ArgsAddr}})
+{{end}}
`))
)
memOpData.ArgsLoadAddr += " mask"
}
memOpData.ArgsAddr += " mem"
- memOpData.tplName = "vregMem"
+ if gOp.MemFeaturesData != nil {
+ _, feat2 := getVbcstData(*gOp.MemFeaturesData)
+ knownFeatChecks := map[string]string{
+ "AVX": "v.Block.CPUfeatures.hasFeature(CPUavx)",
+ "AVX2": "v.Block.CPUfeatures.hasFeature(CPUavx2)",
+ "AVX512": "v.Block.CPUfeatures.hasFeature(CPUavx512)",
+ }
+ memOpData.FeatCheck = knownFeatChecks[feat2]
+ memOpData.tplName = "vregMemFeatCheck"
+ } else {
+ memOpData.tplName = "vregMem"
+ }
memOptData = append(memOptData, memOpData)
}
}
return nil
}
+func getVbcstData(s string) (feat1Match, feat2Match string) {
+ _, err := fmt.Sscanf(s, "feat1=%[^;];feat2=%s", &feat1Match, &feat2Match)
+ if err != nil {
+ panic(err)
+ }
+ return
+}
+
func (o Operation) String() string {
return pprints(o)
}
// Should be paired with special templates in gen_simdrules.go
SpecialLower *string
- In []Operand // Parameters
- InVariant []Operand // Optional parameters
- Out []Operand // Results
- MemFeatures *string // The memory operand feature this operation supports
- Commutative bool // Commutativity
- CPUFeature string // CPUID/Has* feature name
- Zeroing *bool // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z"
- Documentation *string // Documentation will be appended to the stubs comments.
- AddDoc *string // Additional doc to be appended.
+ In []Operand // Parameters
+ InVariant []Operand // Optional parameters
+ Out []Operand // Results
+ MemFeatures *string // The memory operand feature this operation supports
+ MemFeaturesData *string // Additional data associated with MemFeatures
+ Commutative bool // Commutativity
+ CPUFeature string // CPUID/Has* feature name
+ Zeroing *bool // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z"
+ Documentation *string // Documentation will be appended to the stubs comments.
+ AddDoc *string // Additional doc to be appended.
// ConstMask is a hack to reduce the size of defs the user writes for const-immediate
// If present, it will be copied to [In[0].Const].
ConstImm *string
feat1, ok1 := decodeCPUFeature(o.inst)
// Then check if there exist such an operation that for all vreg
// shapes they are the same at the same index
+ var feat1Match, feat2Match string
matchIdx := -1
+ var featMismatchCnt int
outer:
for i, m := range ms {
// Their CPU feature should match first
+ var featMismatch bool
feat2, ok2 := decodeCPUFeature(m.inst)
if !ok1 || !ok2 {
continue
}
if feat1 != feat2 {
- continue
+ featMismatch = true
+ featMismatchCnt++
}
if len(o.ops) == len(m.ops) {
for j := range o.ops {
}
// Found a match, break early
matchIdx = i
- break
+ feat1Match = feat1
+ feat2Match = feat2
+ if featMismatchCnt > 1 {
+ panic("multiple feature mismatch vbcst memops detected, simdgen failed to distinguish")
+ }
+ if !featMismatch {
+ // Mismatch feat is ok but should prioritize matching cases.
+ break
+ }
}
}
// Remove the match from memOps, it's now merged to this pure vreg operation
// Merge is done by adding a new field
// Right now we only have vbcst
addFields["memFeatures"] = "vbcst"
+ if feat1Match != feat2Match {
+ addFields["memFeaturesData"] = fmt.Sprintf("feat1=%s;feat2=%s", feat1Match, feat2Match)
+ }
}
}
}