]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] cmd/compile: track which CPU features are in scope
authorDavid Chase <drchase@google.com>
Fri, 5 Sep 2025 23:05:18 +0000 (19:05 -0400)
committerDavid Chase <drchase@google.com>
Tue, 7 Oct 2025 21:06:28 +0000 (14:06 -0700)
analysis for

- is this block only reached through feature checks?
- does the function signature imply AVX-something?
- is there an instruction in this block which implies AVX-something?

and keep track of which features those are.  Features =
AVX, AVX2, AVX512, etc.

Has a test.

Change-Id: I0b6f2e87d01ec587818db11cf71fac1e4d500650
Reviewed-on: https://go-review.googlesource.com/c/go/+/706337
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
src/cmd/compile/internal/ssa/block.go
src/cmd/compile/internal/ssa/compile.go
src/cmd/compile/internal/ssa/cpufeatures.go [new file with mode: 0644]
src/cmd/compile/internal/ssa/sizeof_test.go
src/cmd/compile/internal/types/type.go
test/simd.go [new file with mode: 0644]

index 1240bfd6556a1108a2717cb72726097bebdcab01..f457e66f16e1a96533a93f84a45626e925fd0c3e 100644 (file)
@@ -18,6 +18,9 @@ type Block struct {
        // Source position for block's control operation
        Pos src.XPos
 
+       // What cpu features (AVXnnn, SVEyyy) are implied to reach/execute this block?
+       CPUfeatures CPUfeatures
+
        // The kind of block this is.
        Kind BlockKind
 
@@ -449,3 +452,53 @@ const (
        HotPgoInitial          = HotPgo | HotInitial                // special case; single block loop, initial block is header block has a flow-in entry, but PGO says it is hot
        HotPgoInitialNotFLowIn = HotPgo | HotInitial | HotNotFlowIn // PGO says it is hot, and the loop is rotated so flow enters loop with a branch
 )
+
+type CPUfeatures uint32
+
+const (
+       CPUNone CPUfeatures = 0
+       CPUAll  CPUfeatures = ^CPUfeatures(0)
+       CPUavx  CPUfeatures = 1 << iota
+       CPUavx2
+       CPUavxvnni
+       CPUavx512
+       CPUbitalg
+       CPUgfni
+       CPUvbmi
+       CPUvbmi2
+       CPUvpopcntdq
+       CPUavx512vnni
+
+       CPUneon
+       CPUsve2
+)
+
+func (f CPUfeatures) String() string {
+       if f == CPUNone {
+               return "none"
+       }
+       if f == CPUAll {
+               return "all"
+       }
+       s := ""
+       foo := func(what string, feat CPUfeatures) {
+               if feat&f != 0 {
+                       if s != "" {
+                               s += "+"
+                       }
+                       s += what
+               }
+       }
+       foo("avx", CPUavx)
+       foo("avx2", CPUavx2)
+       foo("avx512", CPUavx512)
+       foo("avxvnni", CPUavxvnni)
+       foo("bitalg", CPUbitalg)
+       foo("gfni", CPUgfni)
+       foo("vbmi", CPUvbmi)
+       foo("vbmi2", CPUvbmi2)
+       foo("popcntdq", CPUvpopcntdq)
+       foo("avx512vnni", CPUavx512vnni)
+
+       return s
+}
index 1f47362583353e69fa95e82aa98f91fcdb0cd7ce..be1a6f158e677d3b685e548e3a7e375a26d58132 100644 (file)
@@ -485,6 +485,7 @@ var passes = [...]pass{
        {name: "writebarrier", fn: writebarrier, required: true}, // expand write barrier ops
        {name: "insert resched checks", fn: insertLoopReschedChecks,
                disabled: !buildcfg.Experiment.PreemptibleLoops}, // insert resched checks in loops.
+       {name: "cpufeatures", fn: cpufeatures, required: buildcfg.Experiment.SIMD, disabled: !buildcfg.Experiment.SIMD},
        {name: "lower", fn: lower, required: true},
        {name: "addressing modes", fn: addressingModes, required: false},
        {name: "late lower", fn: lateLower, required: true},
@@ -587,6 +588,8 @@ var passOrder = [...]constraint{
        {"branchelim", "late opt"},
        // ranchelim is an arch-independent pass.
        {"branchelim", "lower"},
+       // lower needs cpu feature information (for SIMD)
+       {"cpufeatures", "lower"},
 }
 
 func init() {
diff --git a/src/cmd/compile/internal/ssa/cpufeatures.go b/src/cmd/compile/internal/ssa/cpufeatures.go
new file mode 100644 (file)
index 0000000..77b1db5
--- /dev/null
@@ -0,0 +1,261 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+       "cmd/compile/internal/types"
+       "cmd/internal/obj"
+       "fmt"
+       "internal/goarch"
+)
+
+type localEffect struct {
+       start    CPUfeatures    // features present at beginning of block
+       internal CPUfeatures    // features implied by execution of block
+       end      [2]CPUfeatures // for BlockIf, features present on outgoing edges
+       visited  bool           // On the first iteration this will be false for backedges.
+}
+
+func (e localEffect) String() string {
+       return fmt.Sprintf("visited=%v, start=%v, internal=%v, end[0]=%v, end[1]=%v", e.visited, e.start, e.internal, e.end[0], e.end[1])
+}
+
+// ifEffect pattern matches for a BlockIf conditional on a load
+// of a field from internal/cpu.X86 and returns the corresponding
+// effect.
+func ifEffect(b *Block) (features CPUfeatures, taken int) {
+       // TODO generalize for other architectures.
+       if b.Kind != BlockIf {
+               return
+       }
+       c := b.Controls[0]
+
+       if c.Op == OpNot {
+               taken = 1
+               c = c.Args[0]
+       }
+       if c.Op != OpLoad {
+               return
+       }
+       offPtr := c.Args[0]
+       if offPtr.Op != OpOffPtr {
+               return
+       }
+       addr := offPtr.Args[0]
+       if addr.Op != OpAddr || addr.Args[0].Op != OpSB {
+               return
+       }
+       sym := addr.Aux.(*obj.LSym)
+       if sym.Name != "internal/cpu.X86" {
+               return
+       }
+       o := offPtr.AuxInt
+       t := addr.Type
+       if !t.IsPtr() {
+               b.Func.Fatalf("The symbol %s is not a pointer, found %v instead", sym.Name, t)
+       }
+       t = t.Elem()
+       if !t.IsStruct() {
+               b.Func.Fatalf("The referent of symbol %s is not a struct, found %v instead", sym.Name, t)
+       }
+       match := ""
+       for _, f := range t.Fields() {
+               if o == f.Offset && f.Sym != nil {
+                       match = f.Sym.Name
+                       break
+               }
+       }
+
+       switch match {
+
+       case "HasAVX":
+               features = CPUavx
+       case "HasAVXVNNI":
+               features = CPUavx | CPUavxvnni
+       case "HasAVX2":
+               features = CPUavx2 | CPUavx
+
+               // Compiler currently treats these all alike.
+       case "HasAVX512", "HasAVX512F", "HasAVX512CD", "HasAVX512BW",
+               "HasAVX512DQ", "HasAVX512VL", "HasAVX512VPCLMULQDQ":
+               features = CPUavx512 | CPUavx2 | CPUavx
+
+       case "HasAVX512GFNI":
+               features = CPUavx512 | CPUgfni | CPUavx2 | CPUavx
+       case "HasAVX512VNNI":
+               features = CPUavx512 | CPUavx512vnni | CPUavx2 | CPUavx
+       case "HasAVX512VBMI":
+               features = CPUavx512 | CPUvbmi | CPUavx2 | CPUavx
+       case "HasAVX512VBMI2":
+               features = CPUavx512 | CPUvbmi2 | CPUavx2 | CPUavx
+       case "HasAVX512BITALG":
+               features = CPUavx512 | CPUbitalg | CPUavx2 | CPUavx
+       case "HasAVX512VPOPCNTDQ":
+               features = CPUavx512 | CPUvpopcntdq | CPUavx2 | CPUavx
+
+       case "HasBMI1":
+               features = CPUvbmi
+       case "HasBMI2":
+               features = CPUvbmi2
+
+               // Features that are not currently interesting to the compiler.
+       case "HasAES", "HasADX", "HasERMS", "HasFSRM", "HasFMA", "HasGFNI", "HasOSXSAVE",
+               "HasPCLMULQDQ", "HasPOPCNT", "HasRDTSCP", "HasSHA",
+               "HasSSE3", "HasSSSE3", "HasSSE41", "HasSSE42":
+
+       }
+       if b.Func.pass.debug > 2 {
+               b.Func.Warnl(b.Pos, "%s, block b%v has features offset %d, match is %s, features is %v", b.Func.Name, b.ID, o, match, features)
+       }
+       return
+}
+
+func cpufeatures(f *Func) {
+       arch := f.Config.Ctxt().Arch.Family
+       // TODO there are other SIMD architectures
+       if arch != goarch.AMD64 {
+               return
+       }
+
+       po := f.Postorder()
+
+       effects := make([]localEffect, 1+f.NumBlocks(), 1+f.NumBlocks())
+
+       features := func(t *types.Type) CPUfeatures {
+               if t.IsSIMD() {
+                       switch t.Size() {
+                       case 16, 32:
+                               return CPUavx
+                       case 64:
+                               return CPUavx512 | CPUavx2 | CPUavx
+                       }
+               }
+               return CPUNone
+       }
+
+       // visit blocks in reverse post order
+       // when b is visited, all of its predecessors (except for loop back edges)
+       // will have been visited
+       for i := len(po) - 1; i >= 0; i-- {
+               b := po[i]
+
+               var feat CPUfeatures
+
+               if b == f.Entry {
+                       // Check the types of inputs and outputs, as well as annotations.
+                       // Start with none and union all that is implied by all the types seen.
+                       if f.Type != nil { // a problem for SSA tests
+                               for _, field := range f.Type.RecvParamsResults() {
+                                       feat |= features(field.Type)
+                               }
+                       }
+
+               } else {
+                       // Start with all and intersect over predecessors
+                       feat = CPUAll
+                       for _, p := range b.Preds {
+                               pb := p.Block()
+                               if !effects[pb.ID].visited {
+
+                                       continue
+                               }
+                               pi := p.Index()
+                               if pb.Kind != BlockIf {
+                                       pi = 0
+                               }
+
+                               feat &= effects[pb.ID].end[pi]
+                       }
+               }
+
+               e := localEffect{start: feat, visited: true}
+
+               // Separately capture the internal effects of this block
+               var internal CPUfeatures
+               for _, v := range b.Values {
+                       // the rule applied here is, if the block contains any
+                       // instruction that would fault if the feature (avx, avx512)
+                       // were not present, then assume that the feature is present
+                       // for all the instructions in the block, a fault is a fault.
+                       t := v.Type
+                       if t.IsResults() {
+                               for i := 0; i < t.NumFields(); i++ {
+                                       feat |= features(t.FieldType(i))
+                               }
+                       } else {
+                               internal |= features(v.Type)
+                       }
+               }
+               e.internal = internal
+               feat |= internal
+
+               branchEffect, taken := ifEffect(b)
+               e.end = [2]CPUfeatures{feat, feat}
+               e.end[taken] |= branchEffect
+
+               effects[b.ID] = e
+               if f.pass.debug > 1 && feat != CPUNone {
+                       f.Warnl(b.Pos, "%s, block b%v has features %v", b.Func.Name, b.ID, feat)
+               }
+
+               b.CPUfeatures = feat
+       }
+
+       // If the flow graph is irreducible, things can still change on backedges.
+       change := true
+       for change {
+               change = false
+               for i := len(po) - 1; i >= 0; i-- {
+                       b := po[i]
+
+                       if b == f.Entry {
+                               continue // cannot change
+                       }
+                       feat := CPUAll
+                       for _, p := range b.Preds {
+                               pb := p.Block()
+                               pi := p.Index()
+                               if pb.Kind != BlockIf {
+                                       pi = 0
+                               }
+                               feat &= effects[pb.ID].end[pi]
+                       }
+                       e := effects[b.ID]
+                       if feat == e.start {
+                               continue
+                       }
+                       e.start = feat
+                       effects[b.ID] = e
+                       // uh-oh, something changed
+                       if f.pass.debug > 1 {
+                               f.Warnl(b.Pos, "%s, block b%v saw predecessor feature change", b.Func.Name, b.ID)
+                       }
+
+                       feat |= e.internal
+                       if feat == e.end[0]&e.end[1] {
+                               continue
+                       }
+
+                       branchEffect, taken := ifEffect(b)
+                       e.end = [2]CPUfeatures{feat, feat}
+                       e.end[taken] |= branchEffect
+
+                       effects[b.ID] = e
+                       b.CPUfeatures = feat
+                       if f.pass.debug > 1 {
+                               f.Warnl(b.Pos, "%s, block b%v has new features %v", b.Func.Name, b.ID, feat)
+                       }
+                       change = true
+               }
+       }
+       if f.pass.debug > 0 {
+               for _, b := range f.Blocks {
+                       if b.CPUfeatures != CPUNone {
+                               f.Warnl(b.Pos, "%s, block b%v has features %v", b.Func.Name, b.ID, b.CPUfeatures)
+                       }
+
+               }
+       }
+}
index a27002ee3ac3b26a557decab61d33de604665398..9a58197925ce138117cc58aea81d91f5e7ba137d 100644 (file)
@@ -21,7 +21,7 @@ func TestSizeof(t *testing.T) {
                _64bit uintptr     // size on 64bit platforms
        }{
                {Value{}, 72, 112},
-               {Block{}, 164, 304},
+               {Block{}, 168, 312},
                {LocalSlot{}, 28, 40},
                {valState{}, 28, 40},
        }
index 652d4362ce7b47fe4acc843c5811ce2c522d0217..fc2c0435bdfa1aeb5aa6545c43354fbf63e7db29 100644 (file)
@@ -989,6 +989,7 @@ func (t *Type) ArgWidth() int64 {
        return t.extra.(*Func).Argwid
 }
 
+// Size returns the width of t in bytes.
 func (t *Type) Size() int64 {
        if t.kind == TSSA {
                return t.width
@@ -997,6 +998,7 @@ func (t *Type) Size() int64 {
        return t.width
 }
 
+// Alignment returns the alignment of t in bytes.
 func (t *Type) Alignment() int64 {
        CalcSize(t)
        return int64(t.align)
diff --git a/test/simd.go b/test/simd.go
new file mode 100644 (file)
index 0000000..b1695fa
--- /dev/null
@@ -0,0 +1,97 @@
+// errorcheck -0 -d=ssa/cpufeatures/debug=1
+
+//go:build goexperiment.simd && amd64
+
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package foo
+
+import "simd"
+
+func f1(x simd.Int8x16) {
+       return // ERROR "has features avx"
+}
+
+func g1() simd.Int8x16 {
+       var x simd.Int8x16
+       return x // ERROR "has features avx$"
+}
+
+type T1 simd.Int8x16
+
+func (x T1) h() {
+       return // ERROR "has features avx$"
+}
+
+func f2(x simd.Int8x64) {
+       return // ERROR "has features avx[+]avx2[+]avx512$"
+}
+
+func g2() simd.Int8x64 {
+       var x simd.Int8x64
+       return x // ERROR "has features avx[+]avx2[+]avx512$"
+}
+
+type T2 simd.Int8x64
+
+func (x T2) h() {
+       return // ERROR "has features avx[+]avx2[+]avx512$"
+}
+
+var a int
+
+func f() {
+       if a == 0 {
+               if !simd.HasAVX512() {
+                       return
+               }
+               println("has avx512") // ERROR "has features avx[+]avx2[+]avx512$"
+       } else {
+               if !simd.HasAVX2() {
+                       return
+               }
+               println("has avx2") // ERROR "has features avx[+]avx2$"
+       }
+       println("has something")
+} // ERROR "has features avx[+]avx2$"
+
+func g() {
+       if simd.HasAVX2() { // ERROR "has features avx[+]avx2$"
+               for range 5 { // ERROR "has features avx[+]avx2$"
+                       if a < 0 { // ERROR "has features avx[+]avx2$"
+                               a++ // ERROR "has features avx[+]avx2$"
+                       }
+               }
+       }
+       println("ahoy!") // ERROR "has features avx[+]avx2$" // this is an artifact of flaky block numbering and why isn't it fused?
+       if a > 0 {
+               a--
+       }
+}
+
+//go:noinline
+func p() bool {
+       return true
+}
+
+func hasIrreducibleLoop() {
+       if simd.HasAVX2() {
+               goto a // ERROR "has features avx[+]avx2$"
+       } else {
+               goto b
+       }
+a:
+       println("a")
+       if p() {
+               goto c
+       }
+b:
+       println("b")
+       if p() {
+               goto a
+       }
+c:
+       println("c")
+}