From: Junyang Shao Date: Tue, 12 Aug 2025 16:53:44 +0000 (+0000) Subject: [dev.simd] cmd/compile: accounts rematerialize ops's output reginfo X-Git-Tag: go1.26rc1~147^2~123 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=9783f86bc8;p=gostls13.git [dev.simd] cmd/compile: accounts rematerialize ops's output reginfo This CL implements the check for rematerializeable value's output regspec at its remateralization site. It has some potential problems, please see the TODO in regalloc.go. Fixes #70451. Change-Id: Ib624b967031776851136554719e939e9bf116b7c Reviewed-on: https://go-review.googlesource.com/c/go/+/695315 Reviewed-by: David Chase TryBot-Bypass: David Chase --- diff --git a/src/cmd/compile/internal/ssa/func.go b/src/cmd/compile/internal/ssa/func.go index 213089a44b..0f895e5018 100644 --- a/src/cmd/compile/internal/ssa/func.go +++ b/src/cmd/compile/internal/ssa/func.go @@ -102,6 +102,7 @@ func (c *Config) NewFunc(fe Frontend, cache *Cache) *Func { NamedValues: make(map[LocalSlot][]*Value), CanonicalLocalSlots: make(map[LocalSlot]*LocalSlot), CanonicalLocalSplits: make(map[LocalSlotSplitKey]*LocalSlot), + OwnAux: &AuxCall{}, } } diff --git a/src/cmd/compile/internal/ssa/func_test.go b/src/cmd/compile/internal/ssa/func_test.go index 6923aaa58e..1372c77e7b 100644 --- a/src/cmd/compile/internal/ssa/func_test.go +++ b/src/cmd/compile/internal/ssa/func_test.go @@ -250,6 +250,11 @@ func Exit(arg string) ctrl { return ctrl{BlockExit, arg, []string{}} } +// Ret specifies a BlockRet. +func Ret(arg string) ctrl { + return ctrl{BlockRet, arg, []string{}} +} + // Eq specifies a BlockAMD64EQ. func Eq(cond, sub, alt string) ctrl { return ctrl{BlockAMD64EQ, cond, []string{sub, alt}} diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go index 4e7f66581f..3e6fe0d128 100644 --- a/src/cmd/compile/internal/ssa/regalloc.go +++ b/src/cmd/compile/internal/ssa/regalloc.go @@ -609,6 +609,29 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos } else if v.rematerializeable() { // Rematerialize instead of loading from the spill location. c = v.copyIntoWithXPos(s.curBlock, pos) + // We need to consider its output mask and potentially issue a Copy + // if there are register mask conflicts. + // This currently happens for the SIMD package only between GP and FP + // register. Because Intel's vector extension can put integer value into + // FP, which is seen as a vector. Example instruction: VPSLL[BWDQ] + // Because GP and FP masks do not overlap, mask & outputMask == 0 + // detects this situation thoroughly. + sourceMask := s.regspec(c).outputs[0].regs + if mask&sourceMask == 0 && !onWasmStack { + s.setOrig(c, v) + s.assignReg(s.allocReg(sourceMask, v), v, c) + // v.Type for the new OpCopy is likely wrong and it might delay the problem + // until ssa to asm lowering, which might need the types to generate the right + // assembly for OpCopy. For Intel's GP to FP move, it happens to be that + // MOV instruction has such a variant so it happens to be right. + // But it's unclear for other architectures or situations, and the problem + // might be exposed when the assembler sees illegal instructions. + // Right now make we still pick v.Type, because at least its size should be correct + // for the rematerialization case the amd64 SIMD package exposed. + // TODO: We might need to figure out a way to find the correct type or make + // the asm lowering use reg info only for OpCopy. + c = s.curBlock.NewValue1(pos, OpCopy, v.Type, c) + } } else { // Load v from its spill location. spill := s.makeSpill(v, s.curBlock) diff --git a/src/cmd/compile/internal/ssa/regalloc_test.go b/src/cmd/compile/internal/ssa/regalloc_test.go index 0f69b852d1..79f94da011 100644 --- a/src/cmd/compile/internal/ssa/regalloc_test.go +++ b/src/cmd/compile/internal/ssa/regalloc_test.go @@ -6,6 +6,7 @@ package ssa import ( "cmd/compile/internal/types" + "cmd/internal/obj/x86" "fmt" "testing" ) @@ -279,3 +280,27 @@ func numOps(b *Block, op Op) int { } return n } + +func TestRematerializeableRegCompatible(t *testing.T) { + c := testConfig(t) + f := c.Fun("entry", + Bloc("entry", + Valu("mem", OpInitMem, types.TypeMem, 0, nil), + Valu("x", OpAMD64MOVLconst, c.config.Types.Int32, 1, nil), + Valu("a", OpAMD64POR, c.config.Types.Float32, 0, nil, "x", "x"), + Valu("res", OpMakeResult, types.NewResults([]*types.Type{c.config.Types.Float32, types.TypeMem}), 0, nil, "a", "mem"), + Ret("res"), + ), + ) + regalloc(f.f) + checkFunc(f.f) + moveFound := false + for _, v := range f.f.Blocks[0].Values { + if v.Op == OpCopy && x86.REG_X0 <= v.Reg() && v.Reg() <= x86.REG_X31 { + moveFound = true + } + } + if !moveFound { + t.Errorf("Expects an Copy to be issued, but got: %+v", f.f) + } +}