]> Cypherpunks repositories - gostls13.git/commitdiff
simd/archsimd: correct type and instruction for SaturateToUint8
authorCherry Mui <cherryyz@google.com>
Fri, 19 Dec 2025 19:48:59 +0000 (14:48 -0500)
committerCherry Mui <cherryyz@google.com>
Fri, 19 Dec 2025 22:39:20 +0000 (14:39 -0800)
It should be defined on unsigned types, not signed types, and use
unsigned conversion instructions.

Change-Id: I49694ccdf1d331cfde88591531c358d9886e83e6
Reviewed-on: https://go-review.googlesource.com/c/go/+/731500
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
src/cmd/compile/internal/amd64/simdssa.go
src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/cmd/compile/internal/ssagen/simdintrinsics.go
src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml
src/simd/archsimd/ops_amd64.go

index f6deba3ec15f674a04f676335a7ae6e8531d8cfe..13353c75a9855c8d866e6b702bf04841ebad85ec 100644 (file)
@@ -175,7 +175,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPMOVSQD128_128,
                ssa.OpAMD64VPMOVSQD128_256,
                ssa.OpAMD64VPMOVSQD256,
+               ssa.OpAMD64VPMOVUSWB128_128,
+               ssa.OpAMD64VPMOVUSWB128_256,
                ssa.OpAMD64VPMOVUSWB256,
+               ssa.OpAMD64VPMOVUSDB128_128,
+               ssa.OpAMD64VPMOVUSDB128_256,
+               ssa.OpAMD64VPMOVUSDB128_512,
+               ssa.OpAMD64VPMOVUSQB128_128,
+               ssa.OpAMD64VPMOVUSQB128_256,
+               ssa.OpAMD64VPMOVUSQB128_512,
                ssa.OpAMD64VPMOVUSDW128_128,
                ssa.OpAMD64VPMOVUSDW128_256,
                ssa.OpAMD64VPMOVUSDW256,
@@ -1010,7 +1018,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPMOVSQDMasked128_128,
                ssa.OpAMD64VPMOVSQDMasked128_256,
                ssa.OpAMD64VPMOVSQDMasked256,
+               ssa.OpAMD64VPMOVUSWBMasked128_128,
+               ssa.OpAMD64VPMOVUSWBMasked128_256,
                ssa.OpAMD64VPMOVUSWBMasked256,
+               ssa.OpAMD64VPMOVUSDBMasked128_128,
+               ssa.OpAMD64VPMOVUSDBMasked128_256,
+               ssa.OpAMD64VPMOVUSDBMasked128_512,
+               ssa.OpAMD64VPMOVUSQBMasked128_128,
+               ssa.OpAMD64VPMOVUSQBMasked128_256,
+               ssa.OpAMD64VPMOVUSQBMasked128_512,
                ssa.OpAMD64VPMOVUSDWMasked128_128,
                ssa.OpAMD64VPMOVUSDWMasked128_256,
                ssa.OpAMD64VPMOVUSDWMasked256,
@@ -2638,7 +2654,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPMOVSQDMasked128_128Merging,
                ssa.OpAMD64VPMOVSQDMasked128_256Merging,
                ssa.OpAMD64VPMOVSQDMasked256Merging,
+               ssa.OpAMD64VPMOVUSWBMasked128_128Merging,
+               ssa.OpAMD64VPMOVUSWBMasked128_256Merging,
                ssa.OpAMD64VPMOVUSWBMasked256Merging,
+               ssa.OpAMD64VPMOVUSDBMasked128_128Merging,
+               ssa.OpAMD64VPMOVUSDBMasked128_256Merging,
+               ssa.OpAMD64VPMOVUSDBMasked128_512Merging,
+               ssa.OpAMD64VPMOVUSQBMasked128_128Merging,
+               ssa.OpAMD64VPMOVUSQBMasked128_256Merging,
+               ssa.OpAMD64VPMOVUSQBMasked128_512Merging,
                ssa.OpAMD64VPMOVUSDWMasked128_128Merging,
                ssa.OpAMD64VPMOVUSDWMasked128_256Merging,
                ssa.OpAMD64VPMOVUSDWMasked256Merging,
@@ -3430,7 +3454,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPMOVSQDMasked128_128,
                ssa.OpAMD64VPMOVSQDMasked128_256,
                ssa.OpAMD64VPMOVSQDMasked256,
+               ssa.OpAMD64VPMOVUSWBMasked128_128,
+               ssa.OpAMD64VPMOVUSWBMasked128_256,
                ssa.OpAMD64VPMOVUSWBMasked256,
+               ssa.OpAMD64VPMOVUSDBMasked128_128,
+               ssa.OpAMD64VPMOVUSDBMasked128_256,
+               ssa.OpAMD64VPMOVUSDBMasked128_512,
+               ssa.OpAMD64VPMOVUSQBMasked128_128,
+               ssa.OpAMD64VPMOVUSQBMasked128_256,
+               ssa.OpAMD64VPMOVUSQBMasked128_512,
                ssa.OpAMD64VPACKUSDWMasked128,
                ssa.OpAMD64VPACKUSDWMasked128load,
                ssa.OpAMD64VPACKUSDWMasked256,
index 88d8567a3d045dafa43f28aea9cfe90dac07df48..39d4f9b8509cdf96ef8a54c4e75c77d4575ac645 100644 (file)
 (SaturateToInt32Int64x2 ...) => (VPMOVSQD128_128 ...)
 (SaturateToInt32Int64x4 ...) => (VPMOVSQD128_256 ...)
 (SaturateToInt32Int64x8 ...) => (VPMOVSQD256 ...)
-(SaturateToUint8Int16x8 ...) => (VPMOVSWB128_128 ...)
-(SaturateToUint8Int16x16 ...) => (VPMOVSWB128_256 ...)
-(SaturateToUint8Int32x4 ...) => (VPMOVSDB128_128 ...)
-(SaturateToUint8Int32x8 ...) => (VPMOVSDB128_256 ...)
-(SaturateToUint8Int32x16 ...) => (VPMOVSDB128_512 ...)
-(SaturateToUint8Int64x2 ...) => (VPMOVSQB128_128 ...)
-(SaturateToUint8Int64x4 ...) => (VPMOVSQB128_256 ...)
-(SaturateToUint8Int64x8 ...) => (VPMOVSQB128_512 ...)
+(SaturateToUint8Uint16x8 ...) => (VPMOVUSWB128_128 ...)
+(SaturateToUint8Uint16x16 ...) => (VPMOVUSWB128_256 ...)
 (SaturateToUint8Uint16x32 ...) => (VPMOVUSWB256 ...)
+(SaturateToUint8Uint32x4 ...) => (VPMOVUSDB128_128 ...)
+(SaturateToUint8Uint32x8 ...) => (VPMOVUSDB128_256 ...)
+(SaturateToUint8Uint32x16 ...) => (VPMOVUSDB128_512 ...)
+(SaturateToUint8Uint64x2 ...) => (VPMOVUSQB128_128 ...)
+(SaturateToUint8Uint64x4 ...) => (VPMOVUSQB128_256 ...)
+(SaturateToUint8Uint64x8 ...) => (VPMOVUSQB128_512 ...)
 (SaturateToUint16Uint32x4 ...) => (VPMOVUSDW128_128 ...)
 (SaturateToUint16Uint32x8 ...) => (VPMOVUSDW128_256 ...)
 (SaturateToUint16Uint32x16 ...) => (VPMOVUSDW256 ...)
 (VMOVDQU64Masked128 (VPMOVSQD128_128 x) mask) => (VPMOVSQDMasked128_128 x mask)
 (VMOVDQU64Masked256 (VPMOVSQD128_256 x) mask) => (VPMOVSQDMasked128_256 x mask)
 (VMOVDQU64Masked256 (VPMOVSQD256 x) mask) => (VPMOVSQDMasked256 x mask)
+(VMOVDQU16Masked128 (VPMOVUSWB128_128 x) mask) => (VPMOVUSWBMasked128_128 x mask)
+(VMOVDQU16Masked256 (VPMOVUSWB128_256 x) mask) => (VPMOVUSWBMasked128_256 x mask)
 (VMOVDQU16Masked256 (VPMOVUSWB256 x) mask) => (VPMOVUSWBMasked256 x mask)
+(VMOVDQU32Masked128 (VPMOVUSDB128_128 x) mask) => (VPMOVUSDBMasked128_128 x mask)
+(VMOVDQU32Masked256 (VPMOVUSDB128_256 x) mask) => (VPMOVUSDBMasked128_256 x mask)
+(VMOVDQU32Masked512 (VPMOVUSDB128_512 x) mask) => (VPMOVUSDBMasked128_512 x mask)
+(VMOVDQU64Masked128 (VPMOVUSQB128_128 x) mask) => (VPMOVUSQBMasked128_128 x mask)
+(VMOVDQU64Masked256 (VPMOVUSQB128_256 x) mask) => (VPMOVUSQBMasked128_256 x mask)
+(VMOVDQU64Masked512 (VPMOVUSQB128_512 x) mask) => (VPMOVUSQBMasked128_512 x mask)
 (VMOVDQU32Masked128 (VPACKUSDW128 x y) mask) => (VPACKUSDWMasked128 x y mask)
 (VMOVDQU32Masked256 (VPACKUSDW256 x y) mask) => (VPACKUSDWMasked256 x y mask)
 (VMOVDQU32Masked512 (VPACKUSDW512 x y) mask) => (VPACKUSDWMasked512 x y mask)
 (VPBLENDMDMasked512 dst (VPMOVDW256 x) mask) => (VPMOVDWMasked256Merging dst x mask)
 (VPBLENDMDMasked512 dst (VPMOVSDB128_512 x) mask) => (VPMOVSDBMasked128_512Merging dst x mask)
 (VPBLENDMDMasked512 dst (VPMOVSDW256 x) mask) => (VPMOVSDWMasked256Merging dst x mask)
+(VPBLENDMDMasked512 dst (VPMOVUSDB128_512 x) mask) => (VPMOVUSDBMasked128_512Merging dst x mask)
 (VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask) => (VPMOVUSDWMasked256Merging dst x mask)
 (VPBLENDMDMasked512 dst (VPMULLD512 x y) mask) => (VPMULLDMasked512Merging dst x y mask)
 (VPBLENDMDMasked512 dst (VPOPCNTD512 x) mask) => (VPOPCNTDMasked512Merging dst x mask)
 (VPBLENDMQMasked512 dst (VPMOVSQB128_512 x) mask) => (VPMOVSQBMasked128_512Merging dst x mask)
 (VPBLENDMQMasked512 dst (VPMOVSQD256 x) mask) => (VPMOVSQDMasked256Merging dst x mask)
 (VPBLENDMQMasked512 dst (VPMOVSQW128_512 x) mask) => (VPMOVSQWMasked128_512Merging dst x mask)
+(VPBLENDMQMasked512 dst (VPMOVUSQB128_512 x) mask) => (VPMOVUSQBMasked128_512Merging dst x mask)
 (VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask) => (VPMOVUSQDMasked256Merging dst x mask)
 (VPBLENDMQMasked512 dst (VPMOVUSQW128_512 x) mask) => (VPMOVUSQWMasked128_512Merging dst x mask)
 (VPBLENDMQMasked512 dst (VPMULLQ512 x y) mask) => (VPMULLQMasked512Merging dst x y mask)
 (VPBLENDVB128 dst (VPMOVSXWQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPMOVSXWQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPMOVSXWQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVUSDB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDBMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPMOVUSDW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDWMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVUSQB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQBMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPMOVUSQD128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQDMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPMOVUSQW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQWMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVUSWB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPMOVWB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPMOVZXBD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPMOVZXBD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
 (VPBLENDVB256 dst (VPMOVSXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
 (VPBLENDVB256 dst (VPMOVSXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
 (VPBLENDVB256 dst (VPMOVSXWD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked512Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVUSDB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDBMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
 (VPBLENDVB256 dst (VPMOVUSDW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDWMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVUSQB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQBMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
 (VPBLENDVB256 dst (VPMOVUSQD128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQDMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
 (VPBLENDVB256 dst (VPMOVUSQW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQWMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVUSWB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
 (VPBLENDVB256 dst (VPMOVWB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
 (VPBLENDVB256 dst (VPMOVZXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
 (VPBLENDVB256 dst (VPMOVZXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
index f38d24fde7e14b00a471a305f6ea70e75c0353c5..cd0cedc831f4e5828a1c051c25acb39cbb2a4bbf 100644 (file)
@@ -780,12 +780,24 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VPMOVSXWQMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVSXWQMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VPMOVSXWQMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VPMOVUSDB128_128", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSDB128_256", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSDB128_512", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSDBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSDBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSDBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVUSDW128_128", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVUSDW128_256", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVUSDW256", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VPMOVUSDWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVUSDWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVUSDWMasked256", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VPMOVUSQB128_128", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSQB128_256", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSQB128_512", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSQBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSQBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSQBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVUSQD128_128", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVUSQD128_256", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVUSQD256", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec256", resultInArg0: false},
@@ -798,7 +810,11 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VPMOVUSQWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVUSQWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVUSQWMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSWB128_128", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSWB128_256", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVUSWB256", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VPMOVUSWBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSWBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVUSWBMasked256", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VPMOVWB128_128", argLength: 1, reg: w11, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVWB128_256", argLength: 1, reg: w11, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false},
@@ -2382,15 +2398,23 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VPMOVSXWQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVSXWQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: true},
                {name: "VPMOVSXWQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: true},
+               {name: "VPMOVUSDBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVUSDBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVUSDBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVUSDWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVUSDWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVUSDWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: true},
+               {name: "VPMOVUSQBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVUSQBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVUSQBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVUSQDMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVUSQDMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVUSQDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec256", resultInArg0: true},
                {name: "VPMOVUSQWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVUSQWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVUSQWMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVUSWBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVUSWBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVUSWBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: true},
                {name: "VPMOVWBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVWBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: true},
index a68d8c4122697c75fa2766baaedc653c33deb606..36f3703bf159cce527619f6122a36c39fa2f221f 100644 (file)
@@ -842,15 +842,15 @@ func simdGenericOps() []opData {
                {name: "SaturateToInt32Int64x2", argLength: 1, commutative: false},
                {name: "SaturateToInt32Int64x4", argLength: 1, commutative: false},
                {name: "SaturateToInt32Int64x8", argLength: 1, commutative: false},
-               {name: "SaturateToUint8Int16x8", argLength: 1, commutative: false},
-               {name: "SaturateToUint8Int16x16", argLength: 1, commutative: false},
-               {name: "SaturateToUint8Int32x4", argLength: 1, commutative: false},
-               {name: "SaturateToUint8Int32x8", argLength: 1, commutative: false},
-               {name: "SaturateToUint8Int32x16", argLength: 1, commutative: false},
-               {name: "SaturateToUint8Int64x2", argLength: 1, commutative: false},
-               {name: "SaturateToUint8Int64x4", argLength: 1, commutative: false},
-               {name: "SaturateToUint8Int64x8", argLength: 1, commutative: false},
+               {name: "SaturateToUint8Uint16x8", argLength: 1, commutative: false},
+               {name: "SaturateToUint8Uint16x16", argLength: 1, commutative: false},
                {name: "SaturateToUint8Uint16x32", argLength: 1, commutative: false},
+               {name: "SaturateToUint8Uint32x4", argLength: 1, commutative: false},
+               {name: "SaturateToUint8Uint32x8", argLength: 1, commutative: false},
+               {name: "SaturateToUint8Uint32x16", argLength: 1, commutative: false},
+               {name: "SaturateToUint8Uint64x2", argLength: 1, commutative: false},
+               {name: "SaturateToUint8Uint64x4", argLength: 1, commutative: false},
+               {name: "SaturateToUint8Uint64x8", argLength: 1, commutative: false},
                {name: "SaturateToUint16ConcatUint32x4", argLength: 2, commutative: false},
                {name: "SaturateToUint16ConcatUint32x8", argLength: 2, commutative: false},
                {name: "SaturateToUint16ConcatUint32x16", argLength: 2, commutative: false},
index 00d581ec9af41dd8fb55db8d1de892c90231b25d..71ad2c2a9a2c7a863586fa2f63dbbf4bcb8f7f18 100644 (file)
@@ -2021,12 +2021,24 @@ const (
        OpAMD64VPMOVSXWQMasked128
        OpAMD64VPMOVSXWQMasked256
        OpAMD64VPMOVSXWQMasked512
+       OpAMD64VPMOVUSDB128_128
+       OpAMD64VPMOVUSDB128_256
+       OpAMD64VPMOVUSDB128_512
+       OpAMD64VPMOVUSDBMasked128_128
+       OpAMD64VPMOVUSDBMasked128_256
+       OpAMD64VPMOVUSDBMasked128_512
        OpAMD64VPMOVUSDW128_128
        OpAMD64VPMOVUSDW128_256
        OpAMD64VPMOVUSDW256
        OpAMD64VPMOVUSDWMasked128_128
        OpAMD64VPMOVUSDWMasked128_256
        OpAMD64VPMOVUSDWMasked256
+       OpAMD64VPMOVUSQB128_128
+       OpAMD64VPMOVUSQB128_256
+       OpAMD64VPMOVUSQB128_512
+       OpAMD64VPMOVUSQBMasked128_128
+       OpAMD64VPMOVUSQBMasked128_256
+       OpAMD64VPMOVUSQBMasked128_512
        OpAMD64VPMOVUSQD128_128
        OpAMD64VPMOVUSQD128_256
        OpAMD64VPMOVUSQD256
@@ -2039,7 +2051,11 @@ const (
        OpAMD64VPMOVUSQWMasked128_128
        OpAMD64VPMOVUSQWMasked128_256
        OpAMD64VPMOVUSQWMasked128_512
+       OpAMD64VPMOVUSWB128_128
+       OpAMD64VPMOVUSWB128_256
        OpAMD64VPMOVUSWB256
+       OpAMD64VPMOVUSWBMasked128_128
+       OpAMD64VPMOVUSWBMasked128_256
        OpAMD64VPMOVUSWBMasked256
        OpAMD64VPMOVWB128_128
        OpAMD64VPMOVWB128_256
@@ -3623,15 +3639,23 @@ const (
        OpAMD64VPMOVSXWQMasked128Merging
        OpAMD64VPMOVSXWQMasked256Merging
        OpAMD64VPMOVSXWQMasked512Merging
+       OpAMD64VPMOVUSDBMasked128_128Merging
+       OpAMD64VPMOVUSDBMasked128_256Merging
+       OpAMD64VPMOVUSDBMasked128_512Merging
        OpAMD64VPMOVUSDWMasked128_128Merging
        OpAMD64VPMOVUSDWMasked128_256Merging
        OpAMD64VPMOVUSDWMasked256Merging
+       OpAMD64VPMOVUSQBMasked128_128Merging
+       OpAMD64VPMOVUSQBMasked128_256Merging
+       OpAMD64VPMOVUSQBMasked128_512Merging
        OpAMD64VPMOVUSQDMasked128_128Merging
        OpAMD64VPMOVUSQDMasked128_256Merging
        OpAMD64VPMOVUSQDMasked256Merging
        OpAMD64VPMOVUSQWMasked128_128Merging
        OpAMD64VPMOVUSQWMasked128_256Merging
        OpAMD64VPMOVUSQWMasked128_512Merging
+       OpAMD64VPMOVUSWBMasked128_128Merging
+       OpAMD64VPMOVUSWBMasked128_256Merging
        OpAMD64VPMOVUSWBMasked256Merging
        OpAMD64VPMOVWBMasked128_128Merging
        OpAMD64VPMOVWBMasked128_256Merging
@@ -6992,15 +7016,15 @@ const (
        OpSaturateToInt32Int64x2
        OpSaturateToInt32Int64x4
        OpSaturateToInt32Int64x8
-       OpSaturateToUint8Int16x8
-       OpSaturateToUint8Int16x16
-       OpSaturateToUint8Int32x4
-       OpSaturateToUint8Int32x8
-       OpSaturateToUint8Int32x16
-       OpSaturateToUint8Int64x2
-       OpSaturateToUint8Int64x4
-       OpSaturateToUint8Int64x8
+       OpSaturateToUint8Uint16x8
+       OpSaturateToUint8Uint16x16
        OpSaturateToUint8Uint16x32
+       OpSaturateToUint8Uint32x4
+       OpSaturateToUint8Uint32x8
+       OpSaturateToUint8Uint32x16
+       OpSaturateToUint8Uint64x2
+       OpSaturateToUint8Uint64x4
+       OpSaturateToUint8Uint64x8
        OpSaturateToUint16ConcatUint32x4
        OpSaturateToUint16ConcatUint32x8
        OpSaturateToUint16ConcatUint32x16
@@ -32103,6 +32127,87 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "VPMOVUSDB128_128",
+               argLen: 1,
+               asm:    x86.AVPMOVUSDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSDB128_256",
+               argLen: 1,
+               asm:    x86.AVPMOVUSDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSDB128_512",
+               argLen: 1,
+               asm:    x86.AVPMOVUSDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSDBMasked128_128",
+               argLen: 2,
+               asm:    x86.AVPMOVUSDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSDBMasked128_256",
+               argLen: 2,
+               asm:    x86.AVPMOVUSDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSDBMasked128_512",
+               argLen: 2,
+               asm:    x86.AVPMOVUSDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
        {
                name:   "VPMOVUSDW128_128",
                argLen: 1,
@@ -32184,6 +32289,87 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "VPMOVUSQB128_128",
+               argLen: 1,
+               asm:    x86.AVPMOVUSQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSQB128_256",
+               argLen: 1,
+               asm:    x86.AVPMOVUSQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSQB128_512",
+               argLen: 1,
+               asm:    x86.AVPMOVUSQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSQBMasked128_128",
+               argLen: 2,
+               asm:    x86.AVPMOVUSQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSQBMasked128_256",
+               argLen: 2,
+               asm:    x86.AVPMOVUSQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSQBMasked128_512",
+               argLen: 2,
+               asm:    x86.AVPMOVUSQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
        {
                name:   "VPMOVUSQD128_128",
                argLen: 1,
@@ -32346,6 +32532,32 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "VPMOVUSWB128_128",
+               argLen: 1,
+               asm:    x86.AVPMOVUSWB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSWB128_256",
+               argLen: 1,
+               asm:    x86.AVPMOVUSWB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
        {
                name:   "VPMOVUSWB256",
                argLen: 1,
@@ -32359,6 +32571,34 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "VPMOVUSWBMasked128_128",
+               argLen: 2,
+               asm:    x86.AVPMOVUSWB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSWBMasked128_256",
+               argLen: 2,
+               asm:    x86.AVPMOVUSWB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
        {
                name:   "VPMOVUSWBMasked256",
                argLen: 2,
@@ -57268,6 +57508,54 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:         "VPMOVUSDBMasked128_128Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVUSDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVUSDBMasked128_256Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVUSDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVUSDBMasked128_512Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVUSDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
        {
                name:         "VPMOVUSDWMasked128_128Merging",
                argLen:       3,
@@ -57316,6 +57604,54 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:         "VPMOVUSQBMasked128_128Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVUSQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVUSQBMasked128_256Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVUSQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVUSQBMasked128_512Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVUSQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
        {
                name:         "VPMOVUSQDMasked128_128Merging",
                argLen:       3,
@@ -57412,6 +57748,38 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:         "VPMOVUSWBMasked128_128Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVUSWB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVUSWBMasked128_256Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVUSWB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
        {
                name:         "VPMOVUSWBMasked256Merging",
                argLen:       3,
@@ -93430,47 +93798,47 @@ var opcodeTable = [...]opInfo{
                generic: true,
        },
        {
-               name:    "SaturateToUint8Int16x8",
+               name:    "SaturateToUint8Uint16x8",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "SaturateToUint8Int16x16",
+               name:    "SaturateToUint8Uint16x16",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "SaturateToUint8Int32x4",
+               name:    "SaturateToUint8Uint16x32",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "SaturateToUint8Int32x8",
+               name:    "SaturateToUint8Uint32x4",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "SaturateToUint8Int32x16",
+               name:    "SaturateToUint8Uint32x8",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "SaturateToUint8Int64x2",
+               name:    "SaturateToUint8Uint32x16",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "SaturateToUint8Int64x4",
+               name:    "SaturateToUint8Uint64x2",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "SaturateToUint8Int64x8",
+               name:    "SaturateToUint8Uint64x4",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "SaturateToUint8Uint16x32",
+               name:    "SaturateToUint8Uint64x8",
                argLen:  1,
                generic: true,
        },
index 797757c322f103ed6eec3cd9a1ad19e0985abbab..9efc566c4821370bb8a71adc9192cfb601709b11 100644 (file)
@@ -5139,32 +5139,32 @@ func rewriteValueAMD64(v *Value) bool {
        case OpSaturateToUint32Uint64x8:
                v.Op = OpAMD64VPMOVUSQD256
                return true
-       case OpSaturateToUint8Int16x16:
-               v.Op = OpAMD64VPMOVSWB128_256
+       case OpSaturateToUint8Uint16x16:
+               v.Op = OpAMD64VPMOVUSWB128_256
                return true
-       case OpSaturateToUint8Int16x8:
-               v.Op = OpAMD64VPMOVSWB128_128
+       case OpSaturateToUint8Uint16x32:
+               v.Op = OpAMD64VPMOVUSWB256
                return true
-       case OpSaturateToUint8Int32x16:
-               v.Op = OpAMD64VPMOVSDB128_512
+       case OpSaturateToUint8Uint16x8:
+               v.Op = OpAMD64VPMOVUSWB128_128
                return true
-       case OpSaturateToUint8Int32x4:
-               v.Op = OpAMD64VPMOVSDB128_128
+       case OpSaturateToUint8Uint32x16:
+               v.Op = OpAMD64VPMOVUSDB128_512
                return true
-       case OpSaturateToUint8Int32x8:
-               v.Op = OpAMD64VPMOVSDB128_256
+       case OpSaturateToUint8Uint32x4:
+               v.Op = OpAMD64VPMOVUSDB128_128
                return true
-       case OpSaturateToUint8Int64x2:
-               v.Op = OpAMD64VPMOVSQB128_128
+       case OpSaturateToUint8Uint32x8:
+               v.Op = OpAMD64VPMOVUSDB128_256
                return true
-       case OpSaturateToUint8Int64x4:
-               v.Op = OpAMD64VPMOVSQB128_256
+       case OpSaturateToUint8Uint64x2:
+               v.Op = OpAMD64VPMOVUSQB128_128
                return true
-       case OpSaturateToUint8Int64x8:
-               v.Op = OpAMD64VPMOVSQB128_512
+       case OpSaturateToUint8Uint64x4:
+               v.Op = OpAMD64VPMOVUSQB128_256
                return true
-       case OpSaturateToUint8Uint16x32:
-               v.Op = OpAMD64VPMOVUSWB256
+       case OpSaturateToUint8Uint64x8:
+               v.Op = OpAMD64VPMOVUSQB128_512
                return true
        case OpScaleFloat32x16:
                v.Op = OpAMD64VSCALEFPS512
@@ -33775,6 +33775,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked128(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
+       // match: (VMOVDQU16Masked128 (VPMOVUSWB128_128 x) mask)
+       // result: (VPMOVUSWBMasked128_128 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVUSWB128_128 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVUSWBMasked128_128)
+               v.AddArg2(x, mask)
+               return true
+       }
        // match: (VMOVDQU16Masked128 (VPSHLDW128 [a] x y) mask)
        // result: (VPSHLDWMasked128 [a] x y mask)
        for {
@@ -34327,6 +34339,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked256(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
+       // match: (VMOVDQU16Masked256 (VPMOVUSWB128_256 x) mask)
+       // result: (VPMOVUSWBMasked128_256 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVUSWB128_256 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVUSWBMasked128_256)
+               v.AddArg2(x, mask)
+               return true
+       }
        // match: (VMOVDQU16Masked256 (VPMOVUSWB256 x) mask)
        // result: (VPMOVUSWBMasked256 x mask)
        for {
@@ -35607,6 +35631,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
+       // match: (VMOVDQU32Masked128 (VPMOVUSDB128_128 x) mask)
+       // result: (VPMOVUSDBMasked128_128 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVUSDB128_128 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVUSDBMasked128_128)
+               v.AddArg2(x, mask)
+               return true
+       }
        // match: (VMOVDQU32Masked128 (VPACKUSDW128 x y) mask)
        // result: (VPACKUSDWMasked128 x y mask)
        for {
@@ -36480,6 +36516,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
+       // match: (VMOVDQU32Masked256 (VPMOVUSDB128_256 x) mask)
+       // result: (VPMOVUSDBMasked128_256 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVUSDB128_256 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVUSDBMasked128_256)
+               v.AddArg2(x, mask)
+               return true
+       }
        // match: (VMOVDQU32Masked256 (VPACKUSDW256 x y) mask)
        // result: (VPACKUSDWMasked256 x y mask)
        for {
@@ -37416,6 +37464,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool {
                v.AddArg3(x, y, mask)
                return true
        }
+       // match: (VMOVDQU32Masked512 (VPMOVUSDB128_512 x) mask)
+       // result: (VPMOVUSDBMasked128_512 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVUSDB128_512 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVUSDBMasked128_512)
+               v.AddArg2(x, mask)
+               return true
+       }
        // match: (VMOVDQU32Masked512 (VPACKUSDW512 x y) mask)
        // result: (VPACKUSDWMasked512 x y mask)
        for {
@@ -38259,6 +38319,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked128(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
+       // match: (VMOVDQU64Masked128 (VPMOVUSQB128_128 x) mask)
+       // result: (VPMOVUSQBMasked128_128 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVUSQB128_128 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVUSQBMasked128_128)
+               v.AddArg2(x, mask)
+               return true
+       }
        // match: (VMOVDQU64Masked128 (VPMOVUSQW128_128 x) mask)
        // result: (VPMOVUSQWMasked128_128 x mask)
        for {
@@ -39100,6 +39172,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked256(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
+       // match: (VMOVDQU64Masked256 (VPMOVUSQB128_256 x) mask)
+       // result: (VPMOVUSQBMasked128_256 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVUSQB128_256 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVUSQBMasked128_256)
+               v.AddArg2(x, mask)
+               return true
+       }
        // match: (VMOVDQU64Masked256 (VPMOVUSQW128_256 x) mask)
        // result: (VPMOVUSQWMasked128_256 x mask)
        for {
@@ -39920,6 +40004,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked512(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
+       // match: (VMOVDQU64Masked512 (VPMOVUSQB128_512 x) mask)
+       // result: (VPMOVUSQBMasked128_512 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVUSQB128_512 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVUSQBMasked128_512)
+               v.AddArg2(x, mask)
+               return true
+       }
        // match: (VMOVDQU64Masked512 (VPMOVUSQW128_512 x) mask)
        // result: (VPMOVUSQWMasked128_512 x mask)
        for {
@@ -44109,6 +44205,19 @@ func rewriteValueAMD64_OpAMD64VPBLENDMDMasked512(v *Value) bool {
                v.AddArg3(dst, x, mask)
                return true
        }
+       // match: (VPBLENDMDMasked512 dst (VPMOVUSDB128_512 x) mask)
+       // result: (VPMOVUSDBMasked128_512Merging dst x mask)
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPMOVUSDB128_512 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               v.reset(OpAMD64VPMOVUSDBMasked128_512Merging)
+               v.AddArg3(dst, x, mask)
+               return true
+       }
        // match: (VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask)
        // result: (VPMOVUSDWMasked256Merging dst x mask)
        for {
@@ -44869,6 +44978,19 @@ func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool {
                v.AddArg3(dst, x, mask)
                return true
        }
+       // match: (VPBLENDMQMasked512 dst (VPMOVUSQB128_512 x) mask)
+       // result: (VPMOVUSQBMasked128_512Merging dst x mask)
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPMOVUSQB128_512 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               v.reset(OpAMD64VPMOVUSQBMasked128_512Merging)
+               v.AddArg3(dst, x, mask)
+               return true
+       }
        // match: (VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask)
        // result: (VPMOVUSQDMasked256Merging dst x mask)
        for {
@@ -47797,6 +47919,25 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                v.AddArg3(dst, x, v0)
                return true
        }
+       // match: (VPBLENDVB128 dst (VPMOVUSDB128_128 x) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VPMOVUSDBMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPMOVUSDB128_128 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VPMOVUSDBMasked128_128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(dst, x, v0)
+               return true
+       }
        // match: (VPBLENDVB128 dst (VPMOVUSDW128_128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
        // result: (VPMOVUSDWMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
@@ -47816,6 +47957,25 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                v.AddArg3(dst, x, v0)
                return true
        }
+       // match: (VPBLENDVB128 dst (VPMOVUSQB128_128 x) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VPMOVUSQBMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPMOVUSQB128_128 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VPMOVUSQBMasked128_128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(dst, x, v0)
+               return true
+       }
        // match: (VPBLENDVB128 dst (VPMOVUSQD128_128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
        // result: (VPMOVUSQDMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
@@ -47854,6 +48014,25 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                v.AddArg3(dst, x, v0)
                return true
        }
+       // match: (VPBLENDVB128 dst (VPMOVUSWB128_128 x) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VPMOVUSWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPMOVUSWB128_128 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VPMOVUSWBMasked128_128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(dst, x, v0)
+               return true
+       }
        // match: (VPBLENDVB128 dst (VPMOVWB128_128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
        // result: (VPMOVWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
@@ -50990,6 +51169,25 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                v.AddArg3(dst, x, v0)
                return true
        }
+       // match: (VPBLENDVB256 dst (VPMOVUSDB128_256 x) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VPMOVUSDBMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPMOVUSDB128_256 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VPMOVUSDBMasked128_256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(dst, x, v0)
+               return true
+       }
        // match: (VPBLENDVB256 dst (VPMOVUSDW128_256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
        // result: (VPMOVUSDWMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
@@ -51009,6 +51207,25 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                v.AddArg3(dst, x, v0)
                return true
        }
+       // match: (VPBLENDVB256 dst (VPMOVUSQB128_256 x) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VPMOVUSQBMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPMOVUSQB128_256 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VPMOVUSQBMasked128_256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(dst, x, v0)
+               return true
+       }
        // match: (VPBLENDVB256 dst (VPMOVUSQD128_256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
        // result: (VPMOVUSQDMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
@@ -51047,6 +51264,25 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                v.AddArg3(dst, x, v0)
                return true
        }
+       // match: (VPBLENDVB256 dst (VPMOVUSWB128_256 x) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VPMOVUSWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPMOVUSWB128_256 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VPMOVUSWBMasked128_256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(dst, x, v0)
+               return true
+       }
        // match: (VPBLENDVB256 dst (VPMOVWB128_256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
        // result: (VPMOVWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
index 7eb54569944d57d5bd4ad0c0c9876d44e3769b6f..22cf50d491c9759c3b0f5a7269341160861ce548 100644 (file)
@@ -931,15 +931,15 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Int64x2.SaturateToInt32", opLen1(ssa.OpSaturateToInt32Int64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int64x4.SaturateToInt32", opLen1(ssa.OpSaturateToInt32Int64x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int64x8.SaturateToInt32", opLen1(ssa.OpSaturateToInt32Int64x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x8.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int16x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int32x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x16.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int32x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int64x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x8.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int64x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x8.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint16x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint16x32.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint16x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x4.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint32x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x16.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint32x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x2.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint64x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x8.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint64x8, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint32x4.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint32x8.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint32x8, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint32x16.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint32x16, types.TypeVec256), sys.AMD64)
index 64cd4cb24e1eb6d997b12150fd2bb99fcdef7387..1d688b434d1e6d04a6943a05b705aad198e74617 100644 (file)
       bits: 128
 - go: SaturateToUint8
   regexpTag: "convert"
-  asm: "VPMOVS[WDQ]B"
+  asm: "VPMOVUS[WDQ]B"
   in:
-    - base: int
+    - base: uint
   out:
-    - base: int
+    - base: uint
       bits: 128
 - go: SaturateToInt8
   regexpTag: "convert"
index 8e32533aec1444e044f61dc8275fedc85b23f9db..304c0c07967fc7eaa6ad7817952f015d409f7c4a 100644 (file)
@@ -5464,53 +5464,53 @@ func (x Int64x8) SaturateToInt32() Int32x8
 // SaturateToUint8 converts element values to uint8 with unsigned saturation.
 // Results are packed to low elements in the returned vector, its upper elements are zeroed.
 //
-// Asm: VPMOVSWB, CPU Feature: AVX512
-func (x Int16x8) SaturateToUint8() Int8x16
+// Asm: VPMOVUSWB, CPU Feature: AVX512
+func (x Uint16x8) SaturateToUint8() Uint8x16
 
 // SaturateToUint8 converts element values to uint8 with unsigned saturation.
 //
-// Asm: VPMOVSWB, CPU Feature: AVX512
-func (x Int16x16) SaturateToUint8() Int8x16
+// Asm: VPMOVUSWB, CPU Feature: AVX512
+func (x Uint16x16) SaturateToUint8() Uint8x16
 
 // SaturateToUint8 converts element values to uint8 with unsigned saturation.
-// Results are packed to low elements in the returned vector, its upper elements are zeroed.
 //
-// Asm: VPMOVSDB, CPU Feature: AVX512
-func (x Int32x4) SaturateToUint8() Int8x16
+// Asm: VPMOVUSWB, CPU Feature: AVX512
+func (x Uint16x32) SaturateToUint8() Uint8x32
 
 // SaturateToUint8 converts element values to uint8 with unsigned saturation.
 // Results are packed to low elements in the returned vector, its upper elements are zeroed.
 //
-// Asm: VPMOVSDB, CPU Feature: AVX512
-func (x Int32x8) SaturateToUint8() Int8x16
+// Asm: VPMOVUSDB, CPU Feature: AVX512
+func (x Uint32x4) SaturateToUint8() Uint8x16
 
 // SaturateToUint8 converts element values to uint8 with unsigned saturation.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
 //
-// Asm: VPMOVSDB, CPU Feature: AVX512
-func (x Int32x16) SaturateToUint8() Int8x16
+// Asm: VPMOVUSDB, CPU Feature: AVX512
+func (x Uint32x8) SaturateToUint8() Uint8x16
 
 // SaturateToUint8 converts element values to uint8 with unsigned saturation.
-// Results are packed to low elements in the returned vector, its upper elements are zeroed.
 //
-// Asm: VPMOVSQB, CPU Feature: AVX512
-func (x Int64x2) SaturateToUint8() Int8x16
+// Asm: VPMOVUSDB, CPU Feature: AVX512
+func (x Uint32x16) SaturateToUint8() Uint8x16
 
 // SaturateToUint8 converts element values to uint8 with unsigned saturation.
 // Results are packed to low elements in the returned vector, its upper elements are zeroed.
 //
-// Asm: VPMOVSQB, CPU Feature: AVX512
-func (x Int64x4) SaturateToUint8() Int8x16
+// Asm: VPMOVUSQB, CPU Feature: AVX512
+func (x Uint64x2) SaturateToUint8() Uint8x16
 
 // SaturateToUint8 converts element values to uint8 with unsigned saturation.
 // Results are packed to low elements in the returned vector, its upper elements are zeroed.
 //
-// Asm: VPMOVSQB, CPU Feature: AVX512
-func (x Int64x8) SaturateToUint8() Int8x16
+// Asm: VPMOVUSQB, CPU Feature: AVX512
+func (x Uint64x4) SaturateToUint8() Uint8x16
 
 // SaturateToUint8 converts element values to uint8 with unsigned saturation.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
 //
-// Asm: VPMOVUSWB, CPU Feature: AVX512
-func (x Uint16x32) SaturateToUint8() Uint8x32
+// Asm: VPMOVUSQB, CPU Feature: AVX512
+func (x Uint64x8) SaturateToUint8() Uint8x16
 
 /* SaturateToUint16 */