]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] simd/_gen: add mem peephole with feat mismatches
authorJunyang Shao <shaojunyang@google.com>
Mon, 13 Oct 2025 18:30:05 +0000 (18:30 +0000)
committerJunyang Shao <shaojunyang@google.com>
Mon, 13 Oct 2025 20:04:47 +0000 (13:04 -0700)
This CL attempts to add peepholes for Op -> Opload where the Opload has
a different CPU feature than Op. However the new simdgen changes doesn't
do anything because such peepholes do not exist.

Change-Id: I20c3e4b43bb7414c3a309d77786218372ca1b5b8
Reviewed-on: https://go-review.googlesource.com/c/go/+/711380
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

src/simd/_gen/simdgen/gen_simdrules.go
src/simd/_gen/simdgen/gen_utility.go
src/simd/_gen/simdgen/godefs.go
src/simd/_gen/simdgen/xed.go

index 2339a1910d76d4a297722886f5c1c10184b68c3b..059a2a4f365bb75fc9e3ee59c9e9e260804a2e96 100644 (file)
@@ -25,6 +25,7 @@ type tplRuleData struct {
        Size           int    // e.g. 128
        ArgsLoadAddr   string // [Args] with its last vreg arg being a concrete "(VMOVDQUload* ptr mem)", and might contain mask.
        ArgsAddr       string // [Args] with its last vreg arg being replaced by "ptr", and might contain mask, and with a "mem" at the end.
+       FeatCheck      string // e.g. "v.Block.CPUfeatures.hasFeature(CPUavx512)" -- for a ssa/_gen rules file.
 }
 
 var (
@@ -43,6 +44,8 @@ var (
 {{end}}
 {{define "vregMem"}}({{.Asm}} {{.ArgsLoadAddr}}) && canMergeLoad(v, l) && clobber(l) => ({{.Asm}}load {{.ArgsAddr}})
 {{end}}
+{{define "vregMemFeatCheck"}}({{.Asm}} {{.ArgsLoadAddr}}) && {{.FeatCheck}} && canMergeLoad(v, l) && clobber(l)=> ({{.Asm}}load {{.ArgsAddr}})
+{{end}}
 `))
 )
 
@@ -277,7 +280,18 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
                                        memOpData.ArgsLoadAddr += " mask"
                                }
                                memOpData.ArgsAddr += " mem"
-                               memOpData.tplName = "vregMem"
+                               if gOp.MemFeaturesData != nil {
+                                       _, feat2 := getVbcstData(*gOp.MemFeaturesData)
+                                       knownFeatChecks := map[string]string{
+                                               "AVX":    "v.Block.CPUfeatures.hasFeature(CPUavx)",
+                                               "AVX2":   "v.Block.CPUfeatures.hasFeature(CPUavx2)",
+                                               "AVX512": "v.Block.CPUfeatures.hasFeature(CPUavx512)",
+                                       }
+                                       memOpData.FeatCheck = knownFeatChecks[feat2]
+                                       memOpData.tplName = "vregMemFeatCheck"
+                               } else {
+                                       memOpData.tplName = "vregMem"
+                               }
                                memOptData = append(memOptData, memOpData)
                        }
                }
index 78a214783b1937c169d004e4ca6f5438d7824b15..70f07cf7a49fd5e20b545bb0cb319bdd52997cd9 100644 (file)
@@ -800,6 +800,14 @@ func reportXEDInconsistency(ops []Operation) error {
        return nil
 }
 
+func getVbcstData(s string) (feat1Match, feat2Match string) {
+       _, err := fmt.Sscanf(s, "feat1=%[^;];feat2=%s", &feat1Match, &feat2Match)
+       if err != nil {
+               panic(err)
+       }
+       return
+}
+
 func (o Operation) String() string {
        return pprints(o)
 }
index f9a2caaca30d7cac9e42456be2083c4488217912..bda1dfc8fec3cf850d36ca99a32a5ee00824ec79 100644 (file)
@@ -52,15 +52,16 @@ type rawOperation struct {
        // Should be paired with special templates in gen_simdrules.go
        SpecialLower *string
 
-       In            []Operand // Parameters
-       InVariant     []Operand // Optional parameters
-       Out           []Operand // Results
-       MemFeatures   *string   // The memory operand feature this operation supports
-       Commutative   bool      // Commutativity
-       CPUFeature    string    // CPUID/Has* feature name
-       Zeroing       *bool     // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z"
-       Documentation *string   // Documentation will be appended to the stubs comments.
-       AddDoc        *string   // Additional doc to be appended.
+       In              []Operand // Parameters
+       InVariant       []Operand // Optional parameters
+       Out             []Operand // Results
+       MemFeatures     *string   // The memory operand feature this operation supports
+       MemFeaturesData *string   // Additional data associated with MemFeatures
+       Commutative     bool      // Commutativity
+       CPUFeature      string    // CPUID/Has* feature name
+       Zeroing         *bool     // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z"
+       Documentation   *string   // Documentation will be appended to the stubs comments.
+       AddDoc          *string   // Additional doc to be appended.
        // ConstMask is a hack to reduce the size of defs the user writes for const-immediate
        // If present, it will be copied to [In[0].Const].
        ConstImm *string
index 1781f5c74d03a2ae683b849110e370f66f48899d..c3eb4780be0069910cd9277cdf7096de11ce0e10 100644 (file)
@@ -125,16 +125,20 @@ func loadXED(xedPath string) []*unify.Value {
                                        feat1, ok1 := decodeCPUFeature(o.inst)
                                        // Then check if there exist such an operation that for all vreg
                                        // shapes they are the same at the same index
+                                       var feat1Match, feat2Match string
                                        matchIdx := -1
+                                       var featMismatchCnt int
                                outer:
                                        for i, m := range ms {
                                                // Their CPU feature should match first
+                                               var featMismatch bool
                                                feat2, ok2 := decodeCPUFeature(m.inst)
                                                if !ok1 || !ok2 {
                                                        continue
                                                }
                                                if feat1 != feat2 {
-                                                       continue
+                                                       featMismatch = true
+                                                       featMismatchCnt++
                                                }
                                                if len(o.ops) == len(m.ops) {
                                                        for j := range o.ops {
@@ -160,7 +164,15 @@ func loadXED(xedPath string) []*unify.Value {
                                                        }
                                                        // Found a match, break early
                                                        matchIdx = i
-                                                       break
+                                                       feat1Match = feat1
+                                                       feat2Match = feat2
+                                                       if featMismatchCnt > 1 {
+                                                               panic("multiple feature mismatch vbcst memops detected, simdgen failed to distinguish")
+                                                       }
+                                                       if !featMismatch {
+                                                               // Mismatch feat is ok but should prioritize matching cases.
+                                                               break
+                                                       }
                                                }
                                        }
                                        // Remove the match from memOps, it's now merged to this pure vreg operation
@@ -169,6 +181,9 @@ func loadXED(xedPath string) []*unify.Value {
                                                // Merge is done by adding a new field
                                                // Right now we only have vbcst
                                                addFields["memFeatures"] = "vbcst"
+                                               if feat1Match != feat2Match {
+                                                       addFields["memFeaturesData"] = fmt.Sprintf("feat1=%s;feat2=%s", feat1Match, feat2Match)
+                                               }
                                        }
                                }
                        }