From 10e796884905d23ab2419cc158769e8fdc73de4e Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Tue, 12 Aug 2025 16:53:44 +0000 Subject: [PATCH] cmd/compile: accounts rematerialize ops's output reginfo This CL implements the check for rematerializeable value's output regspec at its remateralization site. It has some potential problems, please see the TODO in regalloc.go. Fixes #70451. Cherry-picked from the dev.simd branch. This CL is not necessarily SIMD specific. Apply early to reduce risk. Change-Id: Ib624b967031776851136554719e939e9bf116b7c Reviewed-on: https://go-review.googlesource.com/c/go/+/695315 Reviewed-by: David Chase TryBot-Bypass: David Chase Reviewed-on: https://go-review.googlesource.com/c/go/+/708857 Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- src/cmd/compile/internal/ssa/func.go | 1 + src/cmd/compile/internal/ssa/func_test.go | 5 ++++ src/cmd/compile/internal/ssa/regalloc.go | 23 +++++++++++++++++ src/cmd/compile/internal/ssa/regalloc_test.go | 25 +++++++++++++++++++ 4 files changed, 54 insertions(+) diff --git a/src/cmd/compile/internal/ssa/func.go b/src/cmd/compile/internal/ssa/func.go index 5736f0b812..fc8cb3f2fe 100644 --- a/src/cmd/compile/internal/ssa/func.go +++ b/src/cmd/compile/internal/ssa/func.go @@ -102,6 +102,7 @@ func (c *Config) NewFunc(fe Frontend, cache *Cache) *Func { NamedValues: make(map[LocalSlot][]*Value), CanonicalLocalSlots: make(map[LocalSlot]*LocalSlot), CanonicalLocalSplits: make(map[LocalSlotSplitKey]*LocalSlot), + OwnAux: &AuxCall{}, } } diff --git a/src/cmd/compile/internal/ssa/func_test.go b/src/cmd/compile/internal/ssa/func_test.go index a1e639e048..4639d674e1 100644 --- a/src/cmd/compile/internal/ssa/func_test.go +++ b/src/cmd/compile/internal/ssa/func_test.go @@ -250,6 +250,11 @@ func Exit(arg string) ctrl { return ctrl{BlockExit, arg, []string{}} } +// Ret specifies a BlockRet. +func Ret(arg string) ctrl { + return ctrl{BlockRet, arg, []string{}} +} + // Eq specifies a BlockAMD64EQ. func Eq(cond, sub, alt string) ctrl { return ctrl{BlockAMD64EQ, cond, []string{sub, alt}} diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go index 1ce85a8f63..56f9e550b2 100644 --- a/src/cmd/compile/internal/ssa/regalloc.go +++ b/src/cmd/compile/internal/ssa/regalloc.go @@ -609,6 +609,29 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos } else if v.rematerializeable() { // Rematerialize instead of loading from the spill location. c = v.copyIntoWithXPos(s.curBlock, pos) + // We need to consider its output mask and potentially issue a Copy + // if there are register mask conflicts. + // This currently happens for the SIMD package only between GP and FP + // register. Because Intel's vector extension can put integer value into + // FP, which is seen as a vector. Example instruction: VPSLL[BWDQ] + // Because GP and FP masks do not overlap, mask & outputMask == 0 + // detects this situation thoroughly. + sourceMask := s.regspec(c).outputs[0].regs + if mask&sourceMask == 0 && !onWasmStack { + s.setOrig(c, v) + s.assignReg(s.allocReg(sourceMask, v), v, c) + // v.Type for the new OpCopy is likely wrong and it might delay the problem + // until ssa to asm lowering, which might need the types to generate the right + // assembly for OpCopy. For Intel's GP to FP move, it happens to be that + // MOV instruction has such a variant so it happens to be right. + // But it's unclear for other architectures or situations, and the problem + // might be exposed when the assembler sees illegal instructions. + // Right now make we still pick v.Type, because at least its size should be correct + // for the rematerialization case the amd64 SIMD package exposed. + // TODO: We might need to figure out a way to find the correct type or make + // the asm lowering use reg info only for OpCopy. + c = s.curBlock.NewValue1(pos, OpCopy, v.Type, c) + } } else { // Load v from its spill location. spill := s.makeSpill(v, s.curBlock) diff --git a/src/cmd/compile/internal/ssa/regalloc_test.go b/src/cmd/compile/internal/ssa/regalloc_test.go index 0f69b852d1..79f94da011 100644 --- a/src/cmd/compile/internal/ssa/regalloc_test.go +++ b/src/cmd/compile/internal/ssa/regalloc_test.go @@ -6,6 +6,7 @@ package ssa import ( "cmd/compile/internal/types" + "cmd/internal/obj/x86" "fmt" "testing" ) @@ -279,3 +280,27 @@ func numOps(b *Block, op Op) int { } return n } + +func TestRematerializeableRegCompatible(t *testing.T) { + c := testConfig(t) + f := c.Fun("entry", + Bloc("entry", + Valu("mem", OpInitMem, types.TypeMem, 0, nil), + Valu("x", OpAMD64MOVLconst, c.config.Types.Int32, 1, nil), + Valu("a", OpAMD64POR, c.config.Types.Float32, 0, nil, "x", "x"), + Valu("res", OpMakeResult, types.NewResults([]*types.Type{c.config.Types.Float32, types.TypeMem}), 0, nil, "a", "mem"), + Ret("res"), + ), + ) + regalloc(f.f) + checkFunc(f.f) + moveFound := false + for _, v := range f.f.Blocks[0].Values { + if v.Op == OpCopy && x86.REG_X0 <= v.Reg() && v.Reg() <= x86.REG_X31 { + moveFound = true + } + } + if !moveFound { + t.Errorf("Expects an Copy to be issued, but got: %+v", f.f) + } +} -- 2.52.0