From 7fdb1da6b0e4d5f5803240024a8ca201d9f5f9aa Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Thu, 21 Aug 2025 04:33:46 +0000 Subject: [PATCH] [dev.simd] cmd/compile, simd: complete truncating u?int conversions. Downsizing conversions' truncating version complete. Saturation ver not done. Change-Id: I710976c2b5329e2882763d60fcef2a827213df09 Reviewed-on: https://go-review.googlesource.com/c/go/+/697975 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- src/cmd/compile/internal/amd64/simdssa.go | 27 + .../compile/internal/ssa/_gen/simdAMD64.rules | 36 ++ .../compile/internal/ssa/_gen/simdAMD64ops.go | 18 + .../internal/ssa/_gen/simdgenericOps.go | 36 ++ src/cmd/compile/internal/ssa/opGen.go | 477 ++++++++++++++++++ src/cmd/compile/internal/ssa/rewriteAMD64.go | 108 ++++ .../compile/internal/ssagen/simdintrinsics.go | 36 ++ src/simd/_gen/simdgen/godefs.go | 4 + .../_gen/simdgen/ops/Converts/categories.yaml | 12 +- src/simd/_gen/simdgen/ops/Converts/go.yaml | 49 +- src/simd/ops_amd64.go | 256 ++++++++++ 11 files changed, 1055 insertions(+), 4 deletions(-) diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index 8674866df3..e5ff346011 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -41,8 +41,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPBROADCASTW512, ssa.OpAMD64VPBROADCASTD512, ssa.OpAMD64VPBROADCASTQ512, + ssa.OpAMD64VPMOVWB128, + ssa.OpAMD64VPMOVWB256, + ssa.OpAMD64VPMOVDB128, + ssa.OpAMD64VPMOVQB128, ssa.OpAMD64VPMOVSXBW256, ssa.OpAMD64VPMOVSXBW512, + ssa.OpAMD64VPMOVDW128, + ssa.OpAMD64VPMOVDW256, + ssa.OpAMD64VPMOVQW128, ssa.OpAMD64VPMOVSXBW128, ssa.OpAMD64VCVTTPS2DQ128, ssa.OpAMD64VCVTTPS2DQ256, @@ -50,6 +57,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSXBD512, ssa.OpAMD64VPMOVSXWD256, ssa.OpAMD64VPMOVSXWD512, + ssa.OpAMD64VPMOVQD128, + ssa.OpAMD64VPMOVQD256, ssa.OpAMD64VPMOVSXBD128, ssa.OpAMD64VPMOVSXWD128, ssa.OpAMD64VPMOVSXBD256, @@ -715,8 +724,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPCOMPRESSQMasked128, ssa.OpAMD64VPCOMPRESSQMasked256, ssa.OpAMD64VPCOMPRESSQMasked512, + ssa.OpAMD64VPMOVWBMasked128, + ssa.OpAMD64VPMOVWBMasked256, + ssa.OpAMD64VPMOVDBMasked128, + ssa.OpAMD64VPMOVQBMasked128, ssa.OpAMD64VPMOVSXBWMasked256, ssa.OpAMD64VPMOVSXBWMasked512, + ssa.OpAMD64VPMOVDWMasked128, + ssa.OpAMD64VPMOVDWMasked256, + ssa.OpAMD64VPMOVQWMasked128, ssa.OpAMD64VPMOVSXBWMasked128, ssa.OpAMD64VCVTTPS2DQMasked128, ssa.OpAMD64VCVTTPS2DQMasked256, @@ -724,6 +740,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSXBDMasked512, ssa.OpAMD64VPMOVSXWDMasked256, ssa.OpAMD64VPMOVSXWDMasked512, + ssa.OpAMD64VPMOVQDMasked128, + ssa.OpAMD64VPMOVQDMasked256, ssa.OpAMD64VPMOVSXBDMasked128, ssa.OpAMD64VPMOVSXWDMasked128, ssa.OpAMD64VPMOVSXBDMasked256, @@ -1367,8 +1385,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPCOMPRESSQMasked128, ssa.OpAMD64VPCOMPRESSQMasked256, ssa.OpAMD64VPCOMPRESSQMasked512, + ssa.OpAMD64VPMOVWBMasked128, + ssa.OpAMD64VPMOVWBMasked256, + ssa.OpAMD64VPMOVDBMasked128, + ssa.OpAMD64VPMOVQBMasked128, ssa.OpAMD64VPMOVSXBWMasked256, ssa.OpAMD64VPMOVSXBWMasked512, + ssa.OpAMD64VPMOVDWMasked128, + ssa.OpAMD64VPMOVDWMasked256, + ssa.OpAMD64VPMOVQWMasked128, ssa.OpAMD64VPMOVSXBWMasked128, ssa.OpAMD64VCVTTPS2DQMasked128, ssa.OpAMD64VCVTTPS2DQMasked256, @@ -1376,6 +1401,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSXBDMasked512, ssa.OpAMD64VPMOVSXWDMasked256, ssa.OpAMD64VPMOVSXWDMasked512, + ssa.OpAMD64VPMOVQDMasked128, + ssa.OpAMD64VPMOVQDMasked256, ssa.OpAMD64VPMOVSXBDMasked128, ssa.OpAMD64VPMOVSXWDMasked128, ssa.OpAMD64VPMOVSXBDMasked256, diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index 303eec4bc0..66bb69eaf5 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -211,8 +211,23 @@ (CompressUint64x2 x mask) => (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM mask)) (CompressUint64x4 x mask) => (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM mask)) (CompressUint64x8 x mask) => (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM mask)) +(ConvertToInt8Int16x8 ...) => (VPMOVWB128 ...) +(ConvertToInt8Int16x16 ...) => (VPMOVWB128 ...) +(ConvertToInt8Int16x32 ...) => (VPMOVWB256 ...) +(ConvertToInt8Int32x4 ...) => (VPMOVDB128 ...) +(ConvertToInt8Int32x8 ...) => (VPMOVDB128 ...) +(ConvertToInt8Int32x16 ...) => (VPMOVDB128 ...) +(ConvertToInt8Int64x2 ...) => (VPMOVQB128 ...) +(ConvertToInt8Int64x4 ...) => (VPMOVQB128 ...) +(ConvertToInt8Int64x8 ...) => (VPMOVQB128 ...) (ConvertToInt16Int8x16 ...) => (VPMOVSXBW256 ...) (ConvertToInt16Int8x32 ...) => (VPMOVSXBW512 ...) +(ConvertToInt16Int32x4 ...) => (VPMOVDW128 ...) +(ConvertToInt16Int32x8 ...) => (VPMOVDW128 ...) +(ConvertToInt16Int32x16 ...) => (VPMOVDW256 ...) +(ConvertToInt16Int64x2 ...) => (VPMOVQW128 ...) +(ConvertToInt16Int64x4 ...) => (VPMOVQW128 ...) +(ConvertToInt16Int64x8 ...) => (VPMOVQW128 ...) (ConvertToInt16x8Int8x16 ...) => (VPMOVSXBW128 ...) (ConvertToInt32Float32x4 ...) => (VCVTTPS2DQ128 ...) (ConvertToInt32Float32x8 ...) => (VCVTTPS2DQ256 ...) @@ -220,6 +235,9 @@ (ConvertToInt32Int8x16 ...) => (VPMOVSXBD512 ...) (ConvertToInt32Int16x8 ...) => (VPMOVSXWD256 ...) (ConvertToInt32Int16x16 ...) => (VPMOVSXWD512 ...) +(ConvertToInt32Int64x2 ...) => (VPMOVQD128 ...) +(ConvertToInt32Int64x4 ...) => (VPMOVQD128 ...) +(ConvertToInt32Int64x8 ...) => (VPMOVQD256 ...) (ConvertToInt32x4Int8x16 ...) => (VPMOVSXBD128 ...) (ConvertToInt32x4Int16x8 ...) => (VPMOVSXWD128 ...) (ConvertToInt32x8Int8x16 ...) => (VPMOVSXBD256 ...) @@ -231,8 +249,23 @@ (ConvertToInt64x2Int32x4 ...) => (VPMOVSXDQ128 ...) (ConvertToInt64x4Int8x16 ...) => (VPMOVSXBQ256 ...) (ConvertToInt64x8Int8x16 ...) => (VPMOVSXBQ512 ...) +(ConvertToUint8Uint16x8 ...) => (VPMOVWB128 ...) +(ConvertToUint8Uint16x16 ...) => (VPMOVWB128 ...) +(ConvertToUint8Uint16x32 ...) => (VPMOVWB256 ...) +(ConvertToUint8Uint32x4 ...) => (VPMOVDB128 ...) +(ConvertToUint8Uint32x8 ...) => (VPMOVDB128 ...) +(ConvertToUint8Uint32x16 ...) => (VPMOVDB128 ...) +(ConvertToUint8Uint64x2 ...) => (VPMOVQB128 ...) +(ConvertToUint8Uint64x4 ...) => (VPMOVQB128 ...) +(ConvertToUint8Uint64x8 ...) => (VPMOVQB128 ...) (ConvertToUint16Uint8x16 ...) => (VPMOVZXBW256 ...) (ConvertToUint16Uint8x32 ...) => (VPMOVZXBW512 ...) +(ConvertToUint16Uint32x4 ...) => (VPMOVDW128 ...) +(ConvertToUint16Uint32x8 ...) => (VPMOVDW128 ...) +(ConvertToUint16Uint32x16 ...) => (VPMOVDW256 ...) +(ConvertToUint16Uint64x2 ...) => (VPMOVQW128 ...) +(ConvertToUint16Uint64x4 ...) => (VPMOVQW128 ...) +(ConvertToUint16Uint64x8 ...) => (VPMOVQW128 ...) (ConvertToUint16x8Uint8x16 ...) => (VPMOVZXBW128 ...) (ConvertToUint32Float32x4 ...) => (VCVTPS2UDQ128 ...) (ConvertToUint32Float32x8 ...) => (VCVTPS2UDQ256 ...) @@ -240,6 +273,9 @@ (ConvertToUint32Uint8x16 ...) => (VPMOVZXBD512 ...) (ConvertToUint32Uint16x8 ...) => (VPMOVZXWD256 ...) (ConvertToUint32Uint16x16 ...) => (VPMOVZXWD512 ...) +(ConvertToUint32Uint64x2 ...) => (VPMOVQD128 ...) +(ConvertToUint32Uint64x4 ...) => (VPMOVQD128 ...) +(ConvertToUint32Uint64x8 ...) => (VPMOVQD256 ...) (ConvertToUint32x4Uint8x16 ...) => (VPMOVZXBD128 ...) (ConvertToUint32x4Uint16x8 ...) => (VPMOVZXWD128 ...) (ConvertToUint32x8Uint8x16 ...) => (VPMOVZXBD256 ...) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index aa279a9f2a..d8094fdd8f 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -548,6 +548,20 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMINUWMasked128", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINUWMasked256", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINUWMasked512", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVDB128", argLength: 1, reg: w11, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDBMasked128", argLength: 2, reg: wkw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDW128", argLength: 1, reg: w11, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDW256", argLength: 1, reg: w11, asm: "VPMOVDW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVDWMasked128", argLength: 2, reg: wkw, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDWMasked256", argLength: 2, reg: wkw, asm: "VPMOVDW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVQB128", argLength: 1, reg: w11, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQBMasked128", argLength: 2, reg: wkw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQD128", argLength: 1, reg: w11, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQD256", argLength: 1, reg: w11, asm: "VPMOVQD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVQDMasked128", argLength: 2, reg: wkw, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQDMasked256", argLength: 2, reg: wkw, asm: "VPMOVQD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVQW128", argLength: 1, reg: w11, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQWMasked128", argLength: 2, reg: wkw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVSXBD128", argLength: 1, reg: v11, asm: "VPMOVSXBD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVSXBD256", argLength: 1, reg: v11, asm: "VPMOVSXBD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMOVSXBD512", argLength: 1, reg: w11, asm: "VPMOVSXBD", commutative: false, typ: "Vec512", resultInArg0: false}, @@ -584,6 +598,10 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMOVSXWQMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVSXWQMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMOVSXWQMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVWB128", argLength: 1, reg: w11, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVWB256", argLength: 1, reg: w11, asm: "VPMOVWB", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVWBMasked128", argLength: 2, reg: wkw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVWBMasked256", argLength: 2, reg: wkw, asm: "VPMOVWB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMOVZXBD128", argLength: 1, reg: v11, asm: "VPMOVZXBD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVZXBD256", argLength: 1, reg: v11, asm: "VPMOVZXBD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMOVZXBD512", argLength: 1, reg: w11, asm: "VPMOVZXBD", commutative: false, typ: "Vec512", resultInArg0: false}, diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go index 4baad2b312..54f21b584d 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go @@ -203,8 +203,23 @@ func simdGenericOps() []opData { {name: "CompressUint64x2", argLength: 2, commutative: false}, {name: "CompressUint64x4", argLength: 2, commutative: false}, {name: "CompressUint64x8", argLength: 2, commutative: false}, + {name: "ConvertToInt8Int16x8", argLength: 1, commutative: false}, + {name: "ConvertToInt8Int16x16", argLength: 1, commutative: false}, + {name: "ConvertToInt8Int16x32", argLength: 1, commutative: false}, + {name: "ConvertToInt8Int32x4", argLength: 1, commutative: false}, + {name: "ConvertToInt8Int32x8", argLength: 1, commutative: false}, + {name: "ConvertToInt8Int32x16", argLength: 1, commutative: false}, + {name: "ConvertToInt8Int64x2", argLength: 1, commutative: false}, + {name: "ConvertToInt8Int64x4", argLength: 1, commutative: false}, + {name: "ConvertToInt8Int64x8", argLength: 1, commutative: false}, {name: "ConvertToInt16Int8x16", argLength: 1, commutative: false}, {name: "ConvertToInt16Int8x32", argLength: 1, commutative: false}, + {name: "ConvertToInt16Int32x4", argLength: 1, commutative: false}, + {name: "ConvertToInt16Int32x8", argLength: 1, commutative: false}, + {name: "ConvertToInt16Int32x16", argLength: 1, commutative: false}, + {name: "ConvertToInt16Int64x2", argLength: 1, commutative: false}, + {name: "ConvertToInt16Int64x4", argLength: 1, commutative: false}, + {name: "ConvertToInt16Int64x8", argLength: 1, commutative: false}, {name: "ConvertToInt16x8Int8x16", argLength: 1, commutative: false}, {name: "ConvertToInt32Float32x4", argLength: 1, commutative: false}, {name: "ConvertToInt32Float32x8", argLength: 1, commutative: false}, @@ -212,6 +227,9 @@ func simdGenericOps() []opData { {name: "ConvertToInt32Int8x16", argLength: 1, commutative: false}, {name: "ConvertToInt32Int16x8", argLength: 1, commutative: false}, {name: "ConvertToInt32Int16x16", argLength: 1, commutative: false}, + {name: "ConvertToInt32Int64x2", argLength: 1, commutative: false}, + {name: "ConvertToInt32Int64x4", argLength: 1, commutative: false}, + {name: "ConvertToInt32Int64x8", argLength: 1, commutative: false}, {name: "ConvertToInt32x4Int8x16", argLength: 1, commutative: false}, {name: "ConvertToInt32x4Int16x8", argLength: 1, commutative: false}, {name: "ConvertToInt32x8Int8x16", argLength: 1, commutative: false}, @@ -223,8 +241,23 @@ func simdGenericOps() []opData { {name: "ConvertToInt64x2Int32x4", argLength: 1, commutative: false}, {name: "ConvertToInt64x4Int8x16", argLength: 1, commutative: false}, {name: "ConvertToInt64x8Int8x16", argLength: 1, commutative: false}, + {name: "ConvertToUint8Uint16x8", argLength: 1, commutative: false}, + {name: "ConvertToUint8Uint16x16", argLength: 1, commutative: false}, + {name: "ConvertToUint8Uint16x32", argLength: 1, commutative: false}, + {name: "ConvertToUint8Uint32x4", argLength: 1, commutative: false}, + {name: "ConvertToUint8Uint32x8", argLength: 1, commutative: false}, + {name: "ConvertToUint8Uint32x16", argLength: 1, commutative: false}, + {name: "ConvertToUint8Uint64x2", argLength: 1, commutative: false}, + {name: "ConvertToUint8Uint64x4", argLength: 1, commutative: false}, + {name: "ConvertToUint8Uint64x8", argLength: 1, commutative: false}, {name: "ConvertToUint16Uint8x16", argLength: 1, commutative: false}, {name: "ConvertToUint16Uint8x32", argLength: 1, commutative: false}, + {name: "ConvertToUint16Uint32x4", argLength: 1, commutative: false}, + {name: "ConvertToUint16Uint32x8", argLength: 1, commutative: false}, + {name: "ConvertToUint16Uint32x16", argLength: 1, commutative: false}, + {name: "ConvertToUint16Uint64x2", argLength: 1, commutative: false}, + {name: "ConvertToUint16Uint64x4", argLength: 1, commutative: false}, + {name: "ConvertToUint16Uint64x8", argLength: 1, commutative: false}, {name: "ConvertToUint16x8Uint8x16", argLength: 1, commutative: false}, {name: "ConvertToUint32Float32x4", argLength: 1, commutative: false}, {name: "ConvertToUint32Float32x8", argLength: 1, commutative: false}, @@ -232,6 +265,9 @@ func simdGenericOps() []opData { {name: "ConvertToUint32Uint8x16", argLength: 1, commutative: false}, {name: "ConvertToUint32Uint16x8", argLength: 1, commutative: false}, {name: "ConvertToUint32Uint16x16", argLength: 1, commutative: false}, + {name: "ConvertToUint32Uint64x2", argLength: 1, commutative: false}, + {name: "ConvertToUint32Uint64x4", argLength: 1, commutative: false}, + {name: "ConvertToUint32Uint64x8", argLength: 1, commutative: false}, {name: "ConvertToUint32x4Uint8x16", argLength: 1, commutative: false}, {name: "ConvertToUint32x4Uint16x8", argLength: 1, commutative: false}, {name: "ConvertToUint32x8Uint8x16", argLength: 1, commutative: false}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index a45d01b5bb..06084d9c47 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1771,6 +1771,20 @@ const ( OpAMD64VPMINUWMasked128 OpAMD64VPMINUWMasked256 OpAMD64VPMINUWMasked512 + OpAMD64VPMOVDB128 + OpAMD64VPMOVDBMasked128 + OpAMD64VPMOVDW128 + OpAMD64VPMOVDW256 + OpAMD64VPMOVDWMasked128 + OpAMD64VPMOVDWMasked256 + OpAMD64VPMOVQB128 + OpAMD64VPMOVQBMasked128 + OpAMD64VPMOVQD128 + OpAMD64VPMOVQD256 + OpAMD64VPMOVQDMasked128 + OpAMD64VPMOVQDMasked256 + OpAMD64VPMOVQW128 + OpAMD64VPMOVQWMasked128 OpAMD64VPMOVSXBD128 OpAMD64VPMOVSXBD256 OpAMD64VPMOVSXBD512 @@ -1807,6 +1821,10 @@ const ( OpAMD64VPMOVSXWQMasked128 OpAMD64VPMOVSXWQMasked256 OpAMD64VPMOVSXWQMasked512 + OpAMD64VPMOVWB128 + OpAMD64VPMOVWB256 + OpAMD64VPMOVWBMasked128 + OpAMD64VPMOVWBMasked256 OpAMD64VPMOVZXBD128 OpAMD64VPMOVZXBD256 OpAMD64VPMOVZXBD512 @@ -4916,8 +4934,23 @@ const ( OpCompressUint64x2 OpCompressUint64x4 OpCompressUint64x8 + OpConvertToInt8Int16x8 + OpConvertToInt8Int16x16 + OpConvertToInt8Int16x32 + OpConvertToInt8Int32x4 + OpConvertToInt8Int32x8 + OpConvertToInt8Int32x16 + OpConvertToInt8Int64x2 + OpConvertToInt8Int64x4 + OpConvertToInt8Int64x8 OpConvertToInt16Int8x16 OpConvertToInt16Int8x32 + OpConvertToInt16Int32x4 + OpConvertToInt16Int32x8 + OpConvertToInt16Int32x16 + OpConvertToInt16Int64x2 + OpConvertToInt16Int64x4 + OpConvertToInt16Int64x8 OpConvertToInt16x8Int8x16 OpConvertToInt32Float32x4 OpConvertToInt32Float32x8 @@ -4925,6 +4958,9 @@ const ( OpConvertToInt32Int8x16 OpConvertToInt32Int16x8 OpConvertToInt32Int16x16 + OpConvertToInt32Int64x2 + OpConvertToInt32Int64x4 + OpConvertToInt32Int64x8 OpConvertToInt32x4Int8x16 OpConvertToInt32x4Int16x8 OpConvertToInt32x8Int8x16 @@ -4936,8 +4972,23 @@ const ( OpConvertToInt64x2Int32x4 OpConvertToInt64x4Int8x16 OpConvertToInt64x8Int8x16 + OpConvertToUint8Uint16x8 + OpConvertToUint8Uint16x16 + OpConvertToUint8Uint16x32 + OpConvertToUint8Uint32x4 + OpConvertToUint8Uint32x8 + OpConvertToUint8Uint32x16 + OpConvertToUint8Uint64x2 + OpConvertToUint8Uint64x4 + OpConvertToUint8Uint64x8 OpConvertToUint16Uint8x16 OpConvertToUint16Uint8x32 + OpConvertToUint16Uint32x4 + OpConvertToUint16Uint32x8 + OpConvertToUint16Uint32x16 + OpConvertToUint16Uint64x2 + OpConvertToUint16Uint64x4 + OpConvertToUint16Uint64x8 OpConvertToUint16x8Uint8x16 OpConvertToUint32Float32x4 OpConvertToUint32Float32x8 @@ -4945,6 +4996,9 @@ const ( OpConvertToUint32Uint8x16 OpConvertToUint32Uint16x8 OpConvertToUint32Uint16x16 + OpConvertToUint32Uint64x2 + OpConvertToUint32Uint64x4 + OpConvertToUint32Uint64x8 OpConvertToUint32x4Uint8x16 OpConvertToUint32x4Uint16x8 OpConvertToUint32x8Uint8x16 @@ -27038,6 +27092,195 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPMOVDB128", + argLen: 1, + asm: x86.AVPMOVDB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVDBMasked128", + argLen: 2, + asm: x86.AVPMOVDB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVDW128", + argLen: 1, + asm: x86.AVPMOVDW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVDW256", + argLen: 1, + asm: x86.AVPMOVDW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVDWMasked128", + argLen: 2, + asm: x86.AVPMOVDW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVDWMasked256", + argLen: 2, + asm: x86.AVPMOVDW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVQB128", + argLen: 1, + asm: x86.AVPMOVQB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQBMasked128", + argLen: 2, + asm: x86.AVPMOVQB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVQD128", + argLen: 1, + asm: x86.AVPMOVQD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQD256", + argLen: 1, + asm: x86.AVPMOVQD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQDMasked128", + argLen: 2, + asm: x86.AVPMOVQD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVQDMasked256", + argLen: 2, + asm: x86.AVPMOVQD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVQW128", + argLen: 1, + asm: x86.AVPMOVQW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQWMasked128", + argLen: 2, + asm: x86.AVPMOVQW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPMOVSXBD128", argLen: 1, @@ -27524,6 +27767,60 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPMOVWB128", + argLen: 1, + asm: x86.AVPMOVWB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVWB256", + argLen: 1, + asm: x86.AVPMOVWB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVWBMasked128", + argLen: 2, + asm: x86.AVPMOVWB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMOVWBMasked256", + argLen: 2, + asm: x86.AVPMOVWB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPMOVZXBD128", argLen: 1, @@ -65223,6 +65520,51 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "ConvertToInt8Int16x8", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt8Int16x16", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt8Int16x32", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt8Int32x4", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt8Int32x8", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt8Int32x16", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt8Int64x2", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt8Int64x4", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt8Int64x8", + argLen: 1, + generic: true, + }, { name: "ConvertToInt16Int8x16", argLen: 1, @@ -65233,6 +65575,36 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "ConvertToInt16Int32x4", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt16Int32x8", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt16Int32x16", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt16Int64x2", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt16Int64x4", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt16Int64x8", + argLen: 1, + generic: true, + }, { name: "ConvertToInt16x8Int8x16", argLen: 1, @@ -65268,6 +65640,21 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "ConvertToInt32Int64x2", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt32Int64x4", + argLen: 1, + generic: true, + }, + { + name: "ConvertToInt32Int64x8", + argLen: 1, + generic: true, + }, { name: "ConvertToInt32x4Int8x16", argLen: 1, @@ -65323,6 +65710,51 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "ConvertToUint8Uint16x8", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint8Uint16x16", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint8Uint16x32", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint8Uint32x4", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint8Uint32x8", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint8Uint32x16", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint8Uint64x2", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint8Uint64x4", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint8Uint64x8", + argLen: 1, + generic: true, + }, { name: "ConvertToUint16Uint8x16", argLen: 1, @@ -65333,6 +65765,36 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "ConvertToUint16Uint32x4", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint16Uint32x8", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint16Uint32x16", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint16Uint64x2", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint16Uint64x4", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint16Uint64x8", + argLen: 1, + generic: true, + }, { name: "ConvertToUint16x8Uint8x16", argLen: 1, @@ -65368,6 +65830,21 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "ConvertToUint32Uint64x2", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint32Uint64x4", + argLen: 1, + generic: true, + }, + { + name: "ConvertToUint32Uint64x8", + argLen: 1, + generic: true, + }, { name: "ConvertToUint32x4Uint8x16", argLen: 1, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 2e17c84508..9d347b4c7d 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -1370,6 +1370,24 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpConstBool(v) case OpConstNil: return rewriteValueAMD64_OpConstNil(v) + case OpConvertToInt16Int32x16: + v.Op = OpAMD64VPMOVDW256 + return true + case OpConvertToInt16Int32x4: + v.Op = OpAMD64VPMOVDW128 + return true + case OpConvertToInt16Int32x8: + v.Op = OpAMD64VPMOVDW128 + return true + case OpConvertToInt16Int64x2: + v.Op = OpAMD64VPMOVQW128 + return true + case OpConvertToInt16Int64x4: + v.Op = OpAMD64VPMOVQW128 + return true + case OpConvertToInt16Int64x8: + v.Op = OpAMD64VPMOVQW128 + return true case OpConvertToInt16Int8x16: v.Op = OpAMD64VPMOVSXBW256 return true @@ -1394,6 +1412,15 @@ func rewriteValueAMD64(v *Value) bool { case OpConvertToInt32Int16x8: v.Op = OpAMD64VPMOVSXWD256 return true + case OpConvertToInt32Int64x2: + v.Op = OpAMD64VPMOVQD128 + return true + case OpConvertToInt32Int64x4: + v.Op = OpAMD64VPMOVQD128 + return true + case OpConvertToInt32Int64x8: + v.Op = OpAMD64VPMOVQD256 + return true case OpConvertToInt32Int8x16: v.Op = OpAMD64VPMOVSXBD512 return true @@ -1430,6 +1457,51 @@ func rewriteValueAMD64(v *Value) bool { case OpConvertToInt64x8Int8x16: v.Op = OpAMD64VPMOVSXBQ512 return true + case OpConvertToInt8Int16x16: + v.Op = OpAMD64VPMOVWB128 + return true + case OpConvertToInt8Int16x32: + v.Op = OpAMD64VPMOVWB256 + return true + case OpConvertToInt8Int16x8: + v.Op = OpAMD64VPMOVWB128 + return true + case OpConvertToInt8Int32x16: + v.Op = OpAMD64VPMOVDB128 + return true + case OpConvertToInt8Int32x4: + v.Op = OpAMD64VPMOVDB128 + return true + case OpConvertToInt8Int32x8: + v.Op = OpAMD64VPMOVDB128 + return true + case OpConvertToInt8Int64x2: + v.Op = OpAMD64VPMOVQB128 + return true + case OpConvertToInt8Int64x4: + v.Op = OpAMD64VPMOVQB128 + return true + case OpConvertToInt8Int64x8: + v.Op = OpAMD64VPMOVQB128 + return true + case OpConvertToUint16Uint32x16: + v.Op = OpAMD64VPMOVDW256 + return true + case OpConvertToUint16Uint32x4: + v.Op = OpAMD64VPMOVDW128 + return true + case OpConvertToUint16Uint32x8: + v.Op = OpAMD64VPMOVDW128 + return true + case OpConvertToUint16Uint64x2: + v.Op = OpAMD64VPMOVQW128 + return true + case OpConvertToUint16Uint64x4: + v.Op = OpAMD64VPMOVQW128 + return true + case OpConvertToUint16Uint64x8: + v.Op = OpAMD64VPMOVQW128 + return true case OpConvertToUint16Uint8x16: v.Op = OpAMD64VPMOVZXBW256 return true @@ -1454,6 +1526,15 @@ func rewriteValueAMD64(v *Value) bool { case OpConvertToUint32Uint16x8: v.Op = OpAMD64VPMOVZXWD256 return true + case OpConvertToUint32Uint64x2: + v.Op = OpAMD64VPMOVQD128 + return true + case OpConvertToUint32Uint64x4: + v.Op = OpAMD64VPMOVQD128 + return true + case OpConvertToUint32Uint64x8: + v.Op = OpAMD64VPMOVQD256 + return true case OpConvertToUint32Uint8x16: v.Op = OpAMD64VPMOVZXBD512 return true @@ -1496,6 +1577,33 @@ func rewriteValueAMD64(v *Value) bool { case OpConvertToUint64x8Uint8x16: v.Op = OpAMD64VPMOVZXBQ512 return true + case OpConvertToUint8Uint16x16: + v.Op = OpAMD64VPMOVWB128 + return true + case OpConvertToUint8Uint16x32: + v.Op = OpAMD64VPMOVWB256 + return true + case OpConvertToUint8Uint16x8: + v.Op = OpAMD64VPMOVWB128 + return true + case OpConvertToUint8Uint32x16: + v.Op = OpAMD64VPMOVDB128 + return true + case OpConvertToUint8Uint32x4: + v.Op = OpAMD64VPMOVDB128 + return true + case OpConvertToUint8Uint32x8: + v.Op = OpAMD64VPMOVDB128 + return true + case OpConvertToUint8Uint64x2: + v.Op = OpAMD64VPMOVQB128 + return true + case OpConvertToUint8Uint64x4: + v.Op = OpAMD64VPMOVQB128 + return true + case OpConvertToUint8Uint64x8: + v.Op = OpAMD64VPMOVQB128 + return true case OpCopySignInt16x16: v.Op = OpAMD64VPSIGNW256 return true diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index 731b9afecb..a535fa0688 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -223,8 +223,23 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.Compress", opLen2(ssa.OpCompressUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.Compress", opLen2(ssa.OpCompressUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.Compress", opLen2(ssa.OpCompressUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.ConvertToInt8", opLen1(ssa.OpConvertToInt8Int16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.ConvertToInt8", opLen1(ssa.OpConvertToInt8Int16x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x32.ConvertToInt8", opLen1(ssa.OpConvertToInt8Int16x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x4.ConvertToInt8", opLen1(ssa.OpConvertToInt8Int32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.ConvertToInt8", opLen1(ssa.OpConvertToInt8Int32x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x16.ConvertToInt8", opLen1(ssa.OpConvertToInt8Int32x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x2.ConvertToInt8", opLen1(ssa.OpConvertToInt8Int64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.ConvertToInt8", opLen1(ssa.OpConvertToInt8Int64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x8.ConvertToInt8", opLen1(ssa.OpConvertToInt8Int64x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x16.ConvertToInt16", opLen1(ssa.OpConvertToInt16Int8x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x32.ConvertToInt16", opLen1(ssa.OpConvertToInt16Int8x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.ConvertToInt16", opLen1(ssa.OpConvertToInt16Int32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.ConvertToInt16", opLen1(ssa.OpConvertToInt16Int32x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x16.ConvertToInt16", opLen1(ssa.OpConvertToInt16Int32x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x2.ConvertToInt16", opLen1(ssa.OpConvertToInt16Int64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.ConvertToInt16", opLen1(ssa.OpConvertToInt16Int64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x8.ConvertToInt16", opLen1(ssa.OpConvertToInt16Int64x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x16.ConvertToInt16x8", opLen1(ssa.OpConvertToInt16x8Int8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x8, types.TypeVec256), sys.AMD64) @@ -232,6 +247,9 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Int8x16.ConvertToInt32", opLen1(ssa.OpConvertToInt32Int8x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x8.ConvertToInt32", opLen1(ssa.OpConvertToInt32Int16x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.ConvertToInt32", opLen1(ssa.OpConvertToInt32Int16x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.ConvertToInt32", opLen1(ssa.OpConvertToInt32Int64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.ConvertToInt32", opLen1(ssa.OpConvertToInt32Int64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x8.ConvertToInt32", opLen1(ssa.OpConvertToInt32Int64x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x16.ConvertToInt32x4", opLen1(ssa.OpConvertToInt32x4Int8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.ConvertToInt32x4", opLen1(ssa.OpConvertToInt32x4Int16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x16.ConvertToInt32x8", opLen1(ssa.OpConvertToInt32x8Int8x16, types.TypeVec256), sys.AMD64) @@ -243,8 +261,23 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Int32x4.ConvertToInt64x2", opLen1(ssa.OpConvertToInt64x2Int32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x16.ConvertToInt64x4", opLen1(ssa.OpConvertToInt64x4Int8x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x16.ConvertToInt64x8", opLen1(ssa.OpConvertToInt64x8Int8x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.ConvertToUint8", opLen1(ssa.OpConvertToUint8Uint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.ConvertToUint8", opLen1(ssa.OpConvertToUint8Uint16x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x32.ConvertToUint8", opLen1(ssa.OpConvertToUint8Uint16x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x4.ConvertToUint8", opLen1(ssa.OpConvertToUint8Uint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.ConvertToUint8", opLen1(ssa.OpConvertToUint8Uint32x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x16.ConvertToUint8", opLen1(ssa.OpConvertToUint8Uint32x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x2.ConvertToUint8", opLen1(ssa.OpConvertToUint8Uint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.ConvertToUint8", opLen1(ssa.OpConvertToUint8Uint64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x8.ConvertToUint8", opLen1(ssa.OpConvertToUint8Uint64x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x16.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint8x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x32.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint8x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint32x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x16.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint32x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x2.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x8.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint64x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x16.ConvertToUint16x8", opLen1(ssa.OpConvertToUint16x8Uint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x8, types.TypeVec256), sys.AMD64) @@ -252,6 +285,9 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint8x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint8x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint16x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint16x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint64x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x16.ConvertToUint32x4", opLen1(ssa.OpConvertToUint32x4Uint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x8.ConvertToUint32x4", opLen1(ssa.OpConvertToUint32x4Uint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x16.ConvertToUint32x8", opLen1(ssa.OpConvertToUint32x8Uint8x16, types.TypeVec256), sys.AMD64) diff --git a/src/simd/_gen/simdgen/godefs.go b/src/simd/_gen/simdgen/godefs.go index e438d7fa6e..2da78103a6 100644 --- a/src/simd/_gen/simdgen/godefs.go +++ b/src/simd/_gen/simdgen/godefs.go @@ -59,6 +59,7 @@ type rawOperation struct { CPUFeature string // CPUID/Has* feature name Zeroing *bool // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z" Documentation *string // Documentation will be appended to the stubs comments. + AddDoc *string // Additional doc to be appended. // ConstMask is a hack to reduce the size of defs the user writes for const-immediate // If present, it will be copied to [In[0].Const]. ConstImm *string @@ -107,6 +108,9 @@ func (o *Operation) DecodeUnified(v *unify.Value) error { o.NoTypes = &trueVal } } + if o.rawOperation.AddDoc != nil { + o.Documentation += "\n" + *o.rawOperation.AddDoc + } o.In = append(o.rawOperation.In, o.rawOperation.InVariant...) diff --git a/src/simd/_gen/simdgen/ops/Converts/categories.yaml b/src/simd/_gen/simdgen/ops/Converts/categories.yaml index a2508906c3..b172d72dbf 100644 --- a/src/simd/_gen/simdgen/ops/Converts/categories.yaml +++ b/src/simd/_gen/simdgen/ops/Converts/categories.yaml @@ -1,6 +1,10 @@ !sum # Non-truncating conversions -# int<->int or uint<->uint widening or float<->int|uint conversions. +# int<->int or uint<->uint widening, float<->int|uint conversions or trucating conversions. +- go: ConvertToInt8 + commutative: false + documentation: !string |- + // NAME converts element values to int16. - go: ConvertToInt16 commutative: false documentation: !string |- @@ -13,6 +17,10 @@ commutative: false documentation: !string |- // NAME converts element values to int64. +- go: ConvertToUint8 + commutative: false + documentation: !string |- + // NAME converts element values to uint16. - go: ConvertToUint16 commutative: false documentation: !string |- @@ -26,7 +34,7 @@ documentation: !string |- // NAME converts element values to uint64. -# Truncating conversions +# low-part only conversions # int<->int or uint<->uint widening conversions. - go: ConvertToInt16x8 commutative: false diff --git a/src/simd/_gen/simdgen/ops/Converts/go.yaml b/src/simd/_gen/simdgen/ops/Converts/go.yaml index 453050c323..56cb0e45df 100644 --- a/src/simd/_gen/simdgen/ops/Converts/go.yaml +++ b/src/simd/_gen/simdgen/ops/Converts/go.yaml @@ -22,7 +22,6 @@ go: $u base: uint elemBits: 32 - # Widening integer conversions. # uint8 -> uint16 - go: ConvertToUint16 @@ -190,8 +189,54 @@ - *i8x16 out: - *i32x16 +# Truncating conversions +- go: ConvertToInt8 + asm: "VPMOV[WDQ]B" + addDoc: &truncDoc + !string |- + // Conversion is done with truncation on the vector elements. + // Results are packed to low elements in the returned vector, its upper elements are zero-cleared. + in: + - base: int + out: + - base: int +- go: ConvertToUint8 + asm: "VPMOV[WDQ]B" + addDoc: *truncDoc + in: + - base: uint + out: + - base: uint +- go: ConvertToInt16 + asm: "VPMOV[DQ]W" + addDoc: *truncDoc + in: + - base: int + out: + - base: int +- go: ConvertToUint16 + asm: "VPMOV[DQ]W" + addDoc: *truncDoc + in: + - base: uint + out: + - base: uint +- go: ConvertToInt32 + asm: "VPMOVQD" + addDoc: *truncDoc + in: + - base: int + out: + - base: int +- go: ConvertToUint32 + asm: "VPMOVQD" + addDoc: *truncDoc + in: + - base: uint + out: + - base: uint -# Truncating conversions. +# low-part only conversions. # uint8->uint16 - go: ConvertToUint16x8 asm: "VPMOVZXBW" diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go index 418ae22927..2c2b55299c 100644 --- a/src/simd/ops_amd64.go +++ b/src/simd/ops_amd64.go @@ -1195,6 +1195,71 @@ func (x Uint64x4) Compress(mask Mask64x4) Uint64x4 // Asm: VPCOMPRESSQ, CPU Feature: AVX512 func (x Uint64x8) Compress(mask Mask64x8) Uint64x8 +/* ConvertToInt8 */ + +// ConvertToInt8 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVWB, CPU Feature: AVX512 +func (x Int16x8) ConvertToInt8() Int8x16 + +// ConvertToInt8 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVWB, CPU Feature: AVX512 +func (x Int16x16) ConvertToInt8() Int8x16 + +// ConvertToInt8 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVWB, CPU Feature: AVX512 +func (x Int16x32) ConvertToInt8() Int8x32 + +// ConvertToInt8 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDB, CPU Feature: AVX512 +func (x Int32x4) ConvertToInt8() Int8x16 + +// ConvertToInt8 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDB, CPU Feature: AVX512 +func (x Int32x8) ConvertToInt8() Int8x16 + +// ConvertToInt8 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDB, CPU Feature: AVX512 +func (x Int32x16) ConvertToInt8() Int8x16 + +// ConvertToInt8 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQB, CPU Feature: AVX512 +func (x Int64x2) ConvertToInt8() Int8x16 + +// ConvertToInt8 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQB, CPU Feature: AVX512 +func (x Int64x4) ConvertToInt8() Int8x16 + +// ConvertToInt8 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQB, CPU Feature: AVX512 +func (x Int64x8) ConvertToInt8() Int8x16 + /* ConvertToInt16 */ // ConvertToInt16 converts element values to int16. @@ -1207,6 +1272,48 @@ func (x Int8x16) ConvertToInt16() Int16x16 // Asm: VPMOVSXBW, CPU Feature: AVX512 func (x Int8x32) ConvertToInt16() Int16x32 +// ConvertToInt16 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDW, CPU Feature: AVX512 +func (x Int32x4) ConvertToInt16() Int16x8 + +// ConvertToInt16 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDW, CPU Feature: AVX512 +func (x Int32x8) ConvertToInt16() Int16x8 + +// ConvertToInt16 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDW, CPU Feature: AVX512 +func (x Int32x16) ConvertToInt16() Int16x16 + +// ConvertToInt16 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQW, CPU Feature: AVX512 +func (x Int64x2) ConvertToInt16() Int16x8 + +// ConvertToInt16 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQW, CPU Feature: AVX512 +func (x Int64x4) ConvertToInt16() Int16x8 + +// ConvertToInt16 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQW, CPU Feature: AVX512 +func (x Int64x8) ConvertToInt16() Int16x8 + /* ConvertToInt16x8 */ // ConvertToInt16x8 converts 8 lowest vector element values to int16. @@ -1246,6 +1353,27 @@ func (x Int16x8) ConvertToInt32() Int32x8 // Asm: VPMOVSXWD, CPU Feature: AVX512 func (x Int16x16) ConvertToInt32() Int32x16 +// ConvertToInt32 converts element values to int32. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQD, CPU Feature: AVX512 +func (x Int64x2) ConvertToInt32() Int32x4 + +// ConvertToInt32 converts element values to int32. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQD, CPU Feature: AVX512 +func (x Int64x4) ConvertToInt32() Int32x4 + +// ConvertToInt32 converts element values to int32. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQD, CPU Feature: AVX512 +func (x Int64x8) ConvertToInt32() Int32x8 + /* ConvertToInt32x4 */ // ConvertToInt32x4 converts 4 lowest vector element values to int32. @@ -1313,6 +1441,71 @@ func (x Int8x16) ConvertToInt64x4() Int64x4 // Asm: VPMOVSXBQ, CPU Feature: AVX512 func (x Int8x16) ConvertToInt64x8() Int64x8 +/* ConvertToUint8 */ + +// ConvertToUint8 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVWB, CPU Feature: AVX512 +func (x Uint16x8) ConvertToUint8() Uint8x16 + +// ConvertToUint8 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVWB, CPU Feature: AVX512 +func (x Uint16x16) ConvertToUint8() Uint8x16 + +// ConvertToUint8 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVWB, CPU Feature: AVX512 +func (x Uint16x32) ConvertToUint8() Uint8x32 + +// ConvertToUint8 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDB, CPU Feature: AVX512 +func (x Uint32x4) ConvertToUint8() Uint8x16 + +// ConvertToUint8 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDB, CPU Feature: AVX512 +func (x Uint32x8) ConvertToUint8() Uint8x16 + +// ConvertToUint8 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDB, CPU Feature: AVX512 +func (x Uint32x16) ConvertToUint8() Uint8x16 + +// ConvertToUint8 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQB, CPU Feature: AVX512 +func (x Uint64x2) ConvertToUint8() Uint8x16 + +// ConvertToUint8 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQB, CPU Feature: AVX512 +func (x Uint64x4) ConvertToUint8() Uint8x16 + +// ConvertToUint8 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQB, CPU Feature: AVX512 +func (x Uint64x8) ConvertToUint8() Uint8x16 + /* ConvertToUint16 */ // ConvertToUint16 converts element values to uint16. @@ -1325,6 +1518,48 @@ func (x Uint8x16) ConvertToUint16() Uint16x16 // Asm: VPMOVZXBW, CPU Feature: AVX512 func (x Uint8x32) ConvertToUint16() Uint16x32 +// ConvertToUint16 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDW, CPU Feature: AVX512 +func (x Uint32x4) ConvertToUint16() Uint16x8 + +// ConvertToUint16 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDW, CPU Feature: AVX512 +func (x Uint32x8) ConvertToUint16() Uint16x8 + +// ConvertToUint16 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDW, CPU Feature: AVX512 +func (x Uint32x16) ConvertToUint16() Uint16x16 + +// ConvertToUint16 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQW, CPU Feature: AVX512 +func (x Uint64x2) ConvertToUint16() Uint16x8 + +// ConvertToUint16 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQW, CPU Feature: AVX512 +func (x Uint64x4) ConvertToUint16() Uint16x8 + +// ConvertToUint16 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQW, CPU Feature: AVX512 +func (x Uint64x8) ConvertToUint16() Uint16x8 + /* ConvertToUint16x8 */ // ConvertToUint16x8 converts 8 lowest vector element values to uint16. @@ -1364,6 +1599,27 @@ func (x Uint16x8) ConvertToUint32() Uint32x8 // Asm: VPMOVZXWD, CPU Feature: AVX512 func (x Uint16x16) ConvertToUint32() Uint32x16 +// ConvertToUint32 converts element values to uint32. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQD, CPU Feature: AVX512 +func (x Uint64x2) ConvertToUint32() Uint32x4 + +// ConvertToUint32 converts element values to uint32. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQD, CPU Feature: AVX512 +func (x Uint64x4) ConvertToUint32() Uint32x4 + +// ConvertToUint32 converts element values to uint32. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQD, CPU Feature: AVX512 +func (x Uint64x8) ConvertToUint32() Uint32x8 + /* ConvertToUint32x4 */ // ConvertToUint32x4 converts 4 lowest vector element values to uint32. -- 2.52.0