]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] cmd/compile: accounts rematerialize ops's output reginfo
authorJunyang Shao <shaojunyang@google.com>
Tue, 12 Aug 2025 16:53:44 +0000 (16:53 +0000)
committerJunyang Shao <shaojunyang@google.com>
Thu, 14 Aug 2025 18:46:41 +0000 (11:46 -0700)
This CL implements the check for rematerializeable value's output
regspec at its remateralization site. It has some potential problems,
please see the TODO in regalloc.go.

Fixes #70451.

Change-Id: Ib624b967031776851136554719e939e9bf116b7c
Reviewed-on: https://go-review.googlesource.com/c/go/+/695315
Reviewed-by: David Chase <drchase@google.com>
TryBot-Bypass: David Chase <drchase@google.com>

src/cmd/compile/internal/ssa/func.go
src/cmd/compile/internal/ssa/func_test.go
src/cmd/compile/internal/ssa/regalloc.go
src/cmd/compile/internal/ssa/regalloc_test.go

index 213089a44b82330c5c9c6acf8c3b8fff430fd04a..0f895e501895363bb1eb4cec5926937c44bcf6e4 100644 (file)
@@ -102,6 +102,7 @@ func (c *Config) NewFunc(fe Frontend, cache *Cache) *Func {
                NamedValues:          make(map[LocalSlot][]*Value),
                CanonicalLocalSlots:  make(map[LocalSlot]*LocalSlot),
                CanonicalLocalSplits: make(map[LocalSlotSplitKey]*LocalSlot),
+               OwnAux:               &AuxCall{},
        }
 }
 
index 6923aaa58ecadd4745f95c1648aaee6805015189..1372c77e7bd2f92ea77bb75a3fd5e4b5ff9761ce 100644 (file)
@@ -250,6 +250,11 @@ func Exit(arg string) ctrl {
        return ctrl{BlockExit, arg, []string{}}
 }
 
+// Ret specifies a BlockRet.
+func Ret(arg string) ctrl {
+       return ctrl{BlockRet, arg, []string{}}
+}
+
 // Eq specifies a BlockAMD64EQ.
 func Eq(cond, sub, alt string) ctrl {
        return ctrl{BlockAMD64EQ, cond, []string{sub, alt}}
index 4e7f66581f8cc7bb5cbc505e0071884901ee700a..3e6fe0d128ae0d1df0d92a84532acf08e6f8ca34 100644 (file)
@@ -609,6 +609,29 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos
        } else if v.rematerializeable() {
                // Rematerialize instead of loading from the spill location.
                c = v.copyIntoWithXPos(s.curBlock, pos)
+               // We need to consider its output mask and potentially issue a Copy
+               // if there are register mask conflicts.
+               // This currently happens for the SIMD package only between GP and FP
+               // register. Because Intel's vector extension can put integer value into
+               // FP, which is seen as a vector. Example instruction: VPSLL[BWDQ]
+               // Because GP and FP masks do not overlap, mask & outputMask == 0
+               // detects this situation thoroughly.
+               sourceMask := s.regspec(c).outputs[0].regs
+               if mask&sourceMask == 0 && !onWasmStack {
+                       s.setOrig(c, v)
+                       s.assignReg(s.allocReg(sourceMask, v), v, c)
+                       // v.Type for the new OpCopy is likely wrong and it might delay the problem
+                       // until ssa to asm lowering, which might need the types to generate the right
+                       // assembly for OpCopy. For Intel's GP to FP move, it happens to be that
+                       // MOV instruction has such a variant so it happens to be right.
+                       // But it's unclear for other architectures or situations, and the problem
+                       // might be exposed when the assembler sees illegal instructions.
+                       // Right now make we still pick v.Type, because at least its size should be correct
+                       // for the rematerialization case the amd64 SIMD package exposed.
+                       // TODO: We might need to figure out a way to find the correct type or make
+                       // the asm lowering use reg info only for OpCopy.
+                       c = s.curBlock.NewValue1(pos, OpCopy, v.Type, c)
+               }
        } else {
                // Load v from its spill location.
                spill := s.makeSpill(v, s.curBlock)
index 0f69b852d1297113ce1b3becae9925745c6fb965..79f94da0114f933068501f1de767902cd33b007c 100644 (file)
@@ -6,6 +6,7 @@ package ssa
 
 import (
        "cmd/compile/internal/types"
+       "cmd/internal/obj/x86"
        "fmt"
        "testing"
 )
@@ -279,3 +280,27 @@ func numOps(b *Block, op Op) int {
        }
        return n
 }
+
+func TestRematerializeableRegCompatible(t *testing.T) {
+       c := testConfig(t)
+       f := c.Fun("entry",
+               Bloc("entry",
+                       Valu("mem", OpInitMem, types.TypeMem, 0, nil),
+                       Valu("x", OpAMD64MOVLconst, c.config.Types.Int32, 1, nil),
+                       Valu("a", OpAMD64POR, c.config.Types.Float32, 0, nil, "x", "x"),
+                       Valu("res", OpMakeResult, types.NewResults([]*types.Type{c.config.Types.Float32, types.TypeMem}), 0, nil, "a", "mem"),
+                       Ret("res"),
+               ),
+       )
+       regalloc(f.f)
+       checkFunc(f.f)
+       moveFound := false
+       for _, v := range f.f.Blocks[0].Values {
+               if v.Op == OpCopy && x86.REG_X0 <= v.Reg() && v.Reg() <= x86.REG_X31 {
+                       moveFound = true
+               }
+       }
+       if !moveFound {
+               t.Errorf("Expects an Copy to be issued, but got: %+v", f.f)
+       }
+}