From 908e3e8166898a3b5f7c961e774f681da2a765bc Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Thu, 14 Aug 2025 16:56:28 +0000 Subject: [PATCH] [dev.simd] cmd/compile: make (most) move/load/store lowering use reg and width only This CL tries to clean up the move/load/store lowering a bit. After CL 695315 the register information for instructions are expected to be correct for SIMD, but we still need to pick the right instruction during ssa to asm lowering. The code before this CL should be working correctly, but MOVSSconst and MOVSDconst contains duplicated codes, this CL removes that. This CL also rewrite move/load/storeByTypeAndReg to use only the width and reg for all non-SIMD types, which is more consistent. Change-Id: I76c14f3d0140bcbd4fbea0df275fee0202a3b7d9 Reviewed-on: https://go-review.googlesource.com/c/go/+/696175 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- src/cmd/compile/internal/amd64/ssa.go | 118 ++++++++------------------ 1 file changed, 35 insertions(+), 83 deletions(-) diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index 25eca691b5..56d0ab2867 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -63,6 +63,7 @@ func loadByTypeAndReg(t *types.Type, r int16) obj.As { } // storeByTypeAndReg returns the store instruction of the given type/register. +// It's also used for loading const to a reg. func storeByTypeAndReg(t *types.Type, r int16) obj.As { width := t.Size() if t.IsSIMD() { @@ -75,67 +76,38 @@ func storeByTypeAndReg(t *types.Type, r int16) obj.As { case 8: return x86.AMOVSD } - } else { - switch width { - case 1: - return x86.AMOVB - case 2: - return x86.AMOVW - case 4: - return x86.AMOVL - case 8: - return x86.AMOVQ - case 16: - return x86.AMOVUPS - } + } + switch width { + case 1: + return x86.AMOVB + case 2: + return x86.AMOVW + case 4: + return x86.AMOVL + case 8: + return x86.AMOVQ + case 16: + return x86.AMOVUPS } panic(fmt.Sprintf("bad store type %v", t)) } -// storeByType returns the store instruction of the given type. -func storeByType(t *types.Type) obj.As { +// moveByTypeAndReg returns the reg->reg move instruction of the given type/registers. +func moveByTypeAndReg(t *types.Type, dest, src int16) obj.As { width := t.Size() - if t.IsFloat() { - switch width { - case 4: - return x86.AMOVSS - case 8: - return x86.AMOVSD - } - } else if t.IsSIMD() { - return simdMov(width) - } else { - switch width { - case 1: - return x86.AMOVB - case 2: - return x86.AMOVW - case 4: - return x86.AMOVL - case 8: - return x86.AMOVQ - case 16: - return x86.AMOVUPS - } + if t.IsSIMD() { + return simdMov(t.Size()) } - panic(fmt.Sprintf("bad store type %v", t)) -} - -// moveByType returns the reg->reg move instruction of the given type. -func moveByType(from, to *ssa.Value) obj.As { - toT := to.Type - fromR, toR := from.Reg(), to.Reg() - if isFPReg(fromR) && isFPReg(toR) && toT.IsFloat() { + // fp -> fp + if isFPReg(dest) && isFPReg(src) { // Moving the whole sse2 register is faster // than moving just the correct low portion of it. // There is no xmm->xmm move with 1 byte opcode, // so use movups, which has 2 byte opcode. return x86.AMOVUPS } - if toT.IsSIMD() { - return simdMov(toT.Size()) - } - switch toT.Size() { + // gp -> fp, fp -> gp, gp -> gp + switch width { case 1: // Avoids partial register write return x86.AMOVL @@ -147,9 +119,8 @@ func moveByType(from, to *ssa.Value) obj.As { return x86.AMOVQ case 16: return x86.AMOVUPS // int128s are in SSE registers - default: - panic(fmt.Sprintf("bad int register width %d:%v", toT.Size(), toT)) } + panic(fmt.Sprintf("bad int register width %d:%v", t.Size(), t)) } // opregreg emits instructions for @@ -645,7 +616,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { // But this requires a way for regalloc to know that SRC might be // clobbered by this instruction. t := v.RegTmp() - opregreg(s, moveByType(v.Args[1], v), t, v.Args[1].Reg()) + opregreg(s, moveByTypeAndReg(v.Type, t, v.Args[1].Reg()), t, v.Args[1].Reg()) p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG @@ -820,34 +791,15 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst: x := v.Reg() - a := v.Op.Asm() - if x < x86.REG_X0 { // not an FP register - if v.AuxInt == 0 && v.Aux == nil { - opregreg(s, x86.AXORL, x, x) - break - } - c := v.AuxInt - switch v.Type.Size() { - case 4: - a = x86.AMOVL - c = int64(math.Float32bits(float32(math.Float64frombits(uint64(v.AuxInt))))) - case 8: - a = x86.AMOVQ - default: - panic(fmt.Sprintf("unexpected type width for float const into non-float register, %v", v)) - } - p := s.Prog(a) - p.From.Type = obj.TYPE_CONST - p.From.Offset = c - p.To.Type = obj.TYPE_REG - p.To.Reg = x - } else { - p := s.Prog(a) - p.From.Type = obj.TYPE_FCONST - p.From.Val = math.Float64frombits(uint64(v.AuxInt)) - p.To.Type = obj.TYPE_REG - p.To.Reg = x + if !isFPReg(x) && v.AuxInt == 0 && v.Aux == nil { + opregreg(s, x86.AXORL, x, x) + break } + p := s.Prog(storeByTypeAndReg(v.Type, x)) + p.From.Type = obj.TYPE_FCONST + p.From.Val = math.Float64frombits(uint64(v.AuxInt)) + p.To.Type = obj.TYPE_REG + p.To.Reg = x case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVOload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVBEQload, ssa.OpAMD64MOVBELload: @@ -1245,7 +1197,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { y = simdOrMaskReg(v) } if x != y { - opregreg(s, moveByType(v.Args[0], v), y, x) + opregreg(s, moveByTypeAndReg(v.Type, y, x), y, x) } case ssa.OpLoadReg: if v.Type.IsFlags() { @@ -1270,7 +1222,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { if v.Type.IsSIMD() { r = simdOrMaskReg(v.Args[0]) } - p := s.Prog(storeByType(v.Type)) + p := s.Prog(storeByTypeAndReg(v.Type, r)) p.From.Type = obj.TYPE_REG p.From.Reg = r ssagen.AddrAuto(&p.To, v) @@ -1287,7 +1239,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { // Pass the spill/unspill information along to the assembler, offset by size of return PC pushed on stack. addr := ssagen.SpillSlotAddr(ap, x86.REG_SP, v.Block.Func.Config.PtrSize) s.FuncInfo().AddSpill( - obj.RegSpill{Reg: ap.Reg, Addr: addr, Unspill: loadByTypeAndReg(ap.Type, ap.Reg), Spill: storeByType(ap.Type)}) + obj.RegSpill{Reg: ap.Reg, Addr: addr, Unspill: loadByTypeAndReg(ap.Type, ap.Reg), Spill: storeByTypeAndReg(ap.Type, ap.Reg)}) } v.Block.Func.RegArgs = nil ssagen.CheckArgReg(v) @@ -2182,7 +2134,7 @@ func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir } func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog { - p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off) + p = pp.Append(p, storeByTypeAndReg(t, reg), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off) p.To.Name = obj.NAME_PARAM p.To.Sym = n.Linksym() p.Pos = p.Pos.WithNotStmt() -- 2.52.0