]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] cmd/compile: fix isIntrinsic for methods; fix fp <-> gp moves
authorDavid Chase <drchase@google.com>
Thu, 7 Aug 2025 20:44:50 +0000 (16:44 -0400)
committerDavid Chase <drchase@google.com>
Wed, 13 Aug 2025 18:48:08 +0000 (11:48 -0700)
also includes a handy debugging hook for the inliner.

Change-Id: I23d0619506219d21db78c6c801612ff058562142
Reviewed-on: https://go-review.googlesource.com/c/go/+/694118
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
src/cmd/compile/internal/amd64/ssa.go
src/cmd/compile/internal/inline/inl.go
src/cmd/compile/internal/ssagen/intrinsics.go

index d3fae7ce14c8e6696857951116eac36db3a4b4e6..38815929d2520f2f70528eb1c7abf17eb1274ea2 100644 (file)
@@ -43,6 +43,10 @@ func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
        }
 }
 
+func isFPReg(r int16) bool {
+       return x86.REG_X0 <= r && r <= x86.REG_Z31
+}
+
 // loadByType returns the load instruction of the given type.
 func loadByType(t *types.Type) obj.As {
        // Avoid partial register write
@@ -88,31 +92,33 @@ func storeByType(t *types.Type) obj.As {
 }
 
 // moveByType returns the reg->reg move instruction of the given type.
-func moveByType(t *types.Type) obj.As {
-       if t.IsFloat() {
+func moveByType(from, to *ssa.Value) obj.As {
+       toT := to.Type
+       fromR, toR := from.Reg(), to.Reg()
+       if isFPReg(fromR) && isFPReg(toR) && toT.IsFloat() {
                // Moving the whole sse2 register is faster
                // than moving just the correct low portion of it.
                // There is no xmm->xmm move with 1 byte opcode,
                // so use movups, which has 2 byte opcode.
                return x86.AMOVUPS
-       } else if t.IsSIMD() {
-               return simdMov(t.Size())
-       } else {
-               switch t.Size() {
-               case 1:
-                       // Avoids partial register write
-                       return x86.AMOVL
-               case 2:
-                       return x86.AMOVL
-               case 4:
-                       return x86.AMOVL
-               case 8:
-                       return x86.AMOVQ
-               case 16:
-                       return x86.AMOVUPS // int128s are in SSE registers
-               default:
-                       panic(fmt.Sprintf("bad int register width %d:%v", t.Size(), t))
-               }
+       }
+       if toT.IsSIMD() {
+               return simdMov(toT.Size())
+       }
+       switch toT.Size() {
+       case 1:
+               // Avoids partial register write
+               return x86.AMOVL
+       case 2:
+               return x86.AMOVL
+       case 4:
+               return x86.AMOVL
+       case 8:
+               return x86.AMOVQ
+       case 16:
+               return x86.AMOVUPS // int128s are in SSE registers
+       default:
+               panic(fmt.Sprintf("bad int register width %d:%v", toT.Size(), toT))
        }
 }
 
@@ -648,7 +654,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                // But this requires a way for regalloc to know that SRC might be
                // clobbered by this instruction.
                t := v.RegTmp()
-               opregreg(s, moveByType(v.Type), t, v.Args[1].Reg())
+               opregreg(s, moveByType(v.Args[1], v), t, v.Args[1].Reg())
 
                p := s.Prog(v.Op.Asm())
                p.From.Type = obj.TYPE_REG
@@ -820,13 +826,37 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                p.From.Offset = v.AuxInt
                p.To.Type = obj.TYPE_REG
                p.To.Reg = x
+
        case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
                x := v.Reg()
-               p := s.Prog(v.Op.Asm())
-               p.From.Type = obj.TYPE_FCONST
-               p.From.Val = math.Float64frombits(uint64(v.AuxInt))
-               p.To.Type = obj.TYPE_REG
-               p.To.Reg = x
+               a := v.Op.Asm()
+               if x < x86.REG_X0 { // not an FP register
+                       if v.AuxInt == 0 && v.Aux == nil {
+                               opregreg(s, x86.AXORL, x, x)
+                               break
+                       }
+                       c := v.AuxInt
+                       switch v.Type.Size() {
+                       case 4:
+                               a = x86.AMOVL
+                               c = int64(math.Float32bits(float32(math.Float64frombits(uint64(v.AuxInt)))))
+                       case 8:
+                               a = x86.AMOVQ
+                       default:
+                               panic(fmt.Sprintf("unexpected type width for float const into non-float register, %v", v))
+                       }
+                       p := s.Prog(a)
+                       p.From.Type = obj.TYPE_CONST
+                       p.From.Offset = c
+                       p.To.Type = obj.TYPE_REG
+                       p.To.Reg = x
+               } else {
+                       p := s.Prog(a)
+                       p.From.Type = obj.TYPE_FCONST
+                       p.From.Val = math.Float64frombits(uint64(v.AuxInt))
+                       p.To.Type = obj.TYPE_REG
+                       p.To.Reg = x
+               }
        case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVOload,
                ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload,
                ssa.OpAMD64MOVBEQload, ssa.OpAMD64MOVBELload:
@@ -1134,7 +1164,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                        y = simdOrMaskReg(v)
                }
                if x != y {
-                       opregreg(s, moveByType(v.Type), y, x)
+                       opregreg(s, moveByType(v.Args[0], v), y, x)
                }
        case ssa.OpLoadReg:
                if v.Type.IsFlags() {
index c06f76fe9ff029158ef84a102b61f297a7a71557..1ba8350803052ec2ba34d54d4fd4c1c5e80c3ed4 100644 (file)
@@ -202,6 +202,7 @@ func inlineBudget(fn *ir.Func, profile *pgoir.Profile, relaxed bool, verbose boo
                // be very liberal here, if the closure is only called once, the budget is large
                budget = max(budget, inlineClosureCalledOnceCost)
        }
+
        return budget
 }
 
@@ -263,6 +264,7 @@ func CanInline(fn *ir.Func, profile *pgoir.Profile) {
 
        visitor := hairyVisitor{
                curFunc:       fn,
+               debug:         isDebugFn(fn),
                isBigFunc:     IsBigFunc(fn),
                budget:        budget,
                maxBudget:     budget,
@@ -407,6 +409,7 @@ type hairyVisitor struct {
        // This is needed to access the current caller in the doNode function.
        curFunc       *ir.Func
        isBigFunc     bool
+       debug         bool
        budget        int32
        maxBudget     int32
        reason        string
@@ -416,6 +419,16 @@ type hairyVisitor struct {
        profile       *pgoir.Profile
 }
 
+func isDebugFn(fn *ir.Func) bool {
+       // if n := fn.Nname; n != nil && n.Sym().Pkg.Path == "0" {
+       //      if n.Sym().Name == "BroadcastInt64x4" {
+       //              fmt.Printf("isDebugFn '%s' DOT '%s'\n", n.Sym().Pkg.Path, n.Sym().Name)
+       //              return true
+       //      }
+       // }
+       return false
+}
+
 func (v *hairyVisitor) tooHairy(fn *ir.Func) bool {
        v.do = v.doNode // cache closure
        if ir.DoChildren(fn, v.do) {
@@ -434,6 +447,9 @@ func (v *hairyVisitor) doNode(n ir.Node) bool {
        if n == nil {
                return false
        }
+       if v.debug {
+               fmt.Printf("%v: doNode %v budget is %d\n", ir.Line(n), n.Op(), v.budget)
+       }
 opSwitch:
        switch n.Op() {
        // Call is okay if inlinable and we have the budget for the body.
@@ -551,12 +567,19 @@ opSwitch:
                }
 
                if cheap {
+                       if v.debug {
+                               if ir.IsIntrinsicCall(n) {
+                                       fmt.Printf("%v: cheap call is also intrinsic, %v\n", ir.Line(n), n)
+                               }
+                       }
                        break // treat like any other node, that is, cost of 1
                }
 
                if ir.IsIntrinsicCall(n) {
-                       // Treat like any other node.
-                       break
+                       if v.debug {
+                               fmt.Printf("%v: intrinsic call, %v\n", ir.Line(n), n)
+                       }
+                       break // Treat like any other node.
                }
 
                if callee := inlCallee(v.curFunc, n.Fun, v.profile, false); callee != nil && typecheck.HaveInlineBody(callee) {
@@ -583,6 +606,10 @@ opSwitch:
                        }
                }
 
+               if v.debug {
+                       fmt.Printf("%v: costly OCALLFUNC %v\n", ir.Line(n), n)
+               }
+
                // Call cost for non-leaf inlining.
                v.budget -= extraCost
 
@@ -592,6 +619,9 @@ opSwitch:
        // Things that are too hairy, irrespective of the budget
        case ir.OCALL, ir.OCALLINTER:
                // Call cost for non-leaf inlining.
+               if v.debug {
+                       fmt.Printf("%v: costly OCALL %v\n", ir.Line(n), n)
+               }
                v.budget -= v.extraCallCost
 
        case ir.OPANIC:
@@ -743,7 +773,7 @@ opSwitch:
        v.budget--
 
        // When debugging, don't stop early, to get full cost of inlining this function
-       if v.budget < 0 && base.Flag.LowerM < 2 && !logopt.Enabled() {
+       if v.budget < 0 && base.Flag.LowerM < 2 && !logopt.Enabled() && !v.debug {
                v.reason = "too expensive"
                return true
        }
index ee03075f524af8fcf5d21315de65a1c72ce51010..f5b5b9bb7cd82874e57f27a2f038cbaf4f781d61 100644 (file)
@@ -1913,6 +1913,13 @@ func IsIntrinsicCall(n *ir.CallExpr) bool {
        }
        name, ok := n.Fun.(*ir.Name)
        if !ok {
+               if n.Fun.Op() == ir.OMETHEXPR {
+                       if meth := ir.MethodExprName(n.Fun); meth != nil {
+                               if fn := meth.Func; fn != nil {
+                                       return IsIntrinsicSym(fn.Sym())
+                               }
+                       }
+               }
                return false
        }
        return IsIntrinsicSym(name.Sym())