From 0d8cb89f5c5acd69c6c9fc600c251cf880010e2d Mon Sep 17 00:00:00 2001 From: David Chase Date: Tue, 24 Jun 2025 16:26:47 -0400 Subject: [PATCH] [dev.simd] cmd/compile: support simd(imm,fp) returns gp These changes are required to make gp-returning simd ops work. amd64/ssa.go includes a new code generator helper, gc/main.go initializes intrinsics AFTER the types, ssa/_gen/*AMD64.go add another register shape to the simd ops function. This CL should be submitted after simdgen CL 683858 which generated some of the changes. Change-Id: I0af752ba8882fa131b875ff9c741ef70afbc60d1 Reviewed-on: https://go-review.googlesource.com/c/go/+/683816 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- src/cmd/compile/internal/amd64/ssa.go | 14 ++++++++++++++ src/cmd/compile/internal/gc/main.go | 6 +++++- src/cmd/compile/internal/ssa/_gen/AMD64Ops.go | 2 +- src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go | 2 +- src/simd/stubs_amd64.go | 4 ++-- 5 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index 1d90da2375..0c9d12620a 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -1720,6 +1720,20 @@ func simdFp3kfpResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog { return p } +func simdFpgpImm8(s *ssagen.State, v *ssa.Value) *obj.Prog { + p := s.Prog(v.Op.Asm()) + imm := v.AuxInt + if imm < 0 || imm > 255 { + v.Fatalf("Invalid source selection immediate") + } + p.From.Offset = imm + p.From.Type = obj.TYPE_CONST + p.AddRestSourceReg(simdReg(v.Args[0])) + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + return p +} + // Currently unused func simdFp31(s *ssagen.State, v *ssa.Value) *obj.Prog { p := s.Prog(v.Op.Asm()) diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go index c486920f5b..20899df04d 100644 --- a/src/cmd/compile/internal/gc/main.go +++ b/src/cmd/compile/internal/gc/main.go @@ -191,7 +191,6 @@ func Main(archInit func(*ssagen.ArchInfo)) { ir.IsIntrinsicSym = ssagen.IsIntrinsicSym inline.SSADumpInline = ssagen.DumpInline ssagen.InitEnv() - ssagen.InitTables() types.PtrSize = ssagen.Arch.LinkArch.PtrSize types.RegSize = ssagen.Arch.LinkArch.RegSize @@ -205,6 +204,11 @@ func Main(archInit func(*ssagen.ArchInfo)) { typecheck.InitRuntime() rttype.Init() + // Some intrinsics (notably, the simd intrinsics) mention + // types "eagerly", thus ssagen must be initialized AFTER + // the type system is ready. + ssagen.InitTables() + // Parse and typecheck input. noder.LoadPackage(flag.Args()) diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go index e2cbc65957..9ff77736f0 100644 --- a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go @@ -1301,7 +1301,7 @@ func init() { pkg: "cmd/internal/obj/x86", genfile: "../../amd64/ssa.go", genSIMDfile: "../../amd64/simdssa.go", - ops: append(AMD64ops, simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp3fp1, fp3k1fp1, fp1gp1fp1)...), // AMD64ops, + ops: append(AMD64ops, simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp3fp1, fp3k1fp1, fp1gp1fp1, fpgp)...), // AMD64ops, blocks: AMD64blocks, regnames: regNamesAMD64, ParamIntRegNames: "AX BX CX DI SI R8 R9 R10 R11", diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index 9f82309463..88d90c2f85 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -1,7 +1,7 @@ // Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. package main -func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp regInfo) []opData { +func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, fpgp regInfo) []opData { return []opData{ {name: "VADDPS512", argLength: 2, reg: fp21, asm: "VADDPS", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VANDPS512", argLength: 2, reg: fp21, asm: "VANDPS", commutative: true, typ: "Vec512", resultInArg0: false}, diff --git a/src/simd/stubs_amd64.go b/src/simd/stubs_amd64.go index ceccf1cf61..66ff8c545e 100644 --- a/src/simd/stubs_amd64.go +++ b/src/simd/stubs_amd64.go @@ -7257,7 +7257,7 @@ func (x Int16x8) SetElem(imm uint8, y int16) Int16x8 // SetElem sets a single constant-indexed element's value. // // Asm: VPINSRD, CPU Feature: AVX -func (x Int32x4) SetElem(imm uint8, y int8) Int32x4 +func (x Int32x4) SetElem(imm uint8, y int32) Int32x4 // SetElem sets a single constant-indexed element's value. // @@ -7277,7 +7277,7 @@ func (x Uint16x8) SetElem(imm uint8, y uint16) Uint16x8 // SetElem sets a single constant-indexed element's value. // // Asm: VPINSRD, CPU Feature: AVX -func (x Uint32x4) SetElem(imm uint8, y uint8) Uint32x4 +func (x Uint32x4) SetElem(imm uint8, y uint32) Uint32x4 // SetElem sets a single constant-indexed element's value. // -- 2.52.0