From 4fda27c0cc5566f945adc6de88de294a3387830a Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Tue, 24 Jun 2025 03:59:30 +0000 Subject: [PATCH] [dev.simd] cmd/compile: glue codes for Shift and Rotate This CL adds two more intrinsic lowering functions. They can issue an OpCopy to move a scalar value to vector value. This is needed by Shift and Rotate APIs. Change-Id: I8a83197d33207072c4a9221a931e67dddd5cd0bf Reviewed-on: https://go-review.googlesource.com/c/go/+/683476 Auto-Submit: Junyang Shao LUCI-TryBot-Result: Go LUCI Reviewed-by: Cherry Mui --- src/cmd/compile/internal/amd64/ssa.go | 44 +++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index 82226ec1cd..1d90da2375 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -1540,6 +1540,21 @@ func simdFp21(s *ssagen.State, v *ssa.Value) *obj.Prog { return p } +// This function is to accustomize the shifts. +// The 2nd arg is an XMM, and this function merely checks that. +// Example instruction: VPSLLQ Z1, X1, Z2 +func simdFpXfp(s *ssagen.State, v *ssa.Value) *obj.Prog { + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + // Vector registers operands follows a right-to-left order. + // e.g. VPSUBD X1, X2, X3 means X3 = X2 - X1. + p.From.Reg = v.Args[1].Reg() + p.AddRestSourceReg(simdReg(v.Args[0])) + p.To.Type = obj.TYPE_REG + p.To.Reg = simdReg(v) + return p +} + // Example instruction: VPCMPEQW Z26, Z30, K4 func simdFp2k(s *ssagen.State, v *ssa.Value) *obj.Prog { // simdReg handles mask and vector registers altogether @@ -1563,6 +1578,20 @@ func simdFp2kfp(s *ssagen.State, v *ssa.Value) *obj.Prog { return p } +// This function is to accustomize the shifts. +// The 2nd arg is an XMM, and this function merely checks that. +// Example instruction: VPSLLQ Z1, X1, K1, Z2 +func simdFpXkfp(s *ssagen.State, v *ssa.Value) *obj.Prog { + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[1].Reg() + p.AddRestSourceReg(simdReg(v.Args[0])) + p.AddRestSourceReg(simdReg(v.Args[2])) + p.To.Type = obj.TYPE_REG + p.To.Reg = simdReg(v) + return p +} + // Example instruction: VPCMPEQW Z26, Z30, K1, K4 func simdFp2kk(s *ssagen.State, v *ssa.Value) *obj.Prog { return simdFp2kfp(s, v) @@ -1664,6 +1693,10 @@ func simdFp2kkImm8(s *ssagen.State, v *ssa.Value) *obj.Prog { return p } +func simdFp2kfpImm8(s *ssagen.State, v *ssa.Value) *obj.Prog { + return simdFp2kkImm8(s, v) +} + // Example instruction: VFMADD213PD Z2, Z1, Z0 func simdFp31ResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog { p := s.Prog(v.Op.Asm()) @@ -1834,6 +1867,17 @@ func simdReg(v *ssa.Value) int16 { panic("unreachable") } +// XXX this is used for shift operations only. +// regalloc will issue OpCopy with incorrect type, but the assigned +// register should be correct, and this function is merely checking +// the sanity of this part. +func simdCheckRegOnly(v *ssa.Value, regStart, regEnd int16) int16 { + if v.Reg() > regEnd || v.Reg() < regStart { + panic("simdCheckRegOnly: not the desired register") + } + return v.Reg() +} + func simdMov(width int64) obj.As { if width >= 64 { return x86.AVMOVDQU64 -- 2.52.0