]> Cypherpunks repositories - gostls13.git/commitdiff
simd/archsimd: delete DotProductQuadruple methods for now
authorCherry Mui <cherryyz@google.com>
Fri, 19 Dec 2025 22:05:03 +0000 (17:05 -0500)
committerCherry Mui <cherryyz@google.com>
Fri, 19 Dec 2025 22:39:35 +0000 (14:39 -0800)
The DotProductQuadruple methods are currently defined on Int8
vectors. There are some problems for that.

1. We defined a DotProductQuadrupleSaturated method, but the dot
product part does not need saturation, as it cannot overflow. It
is the addition part of VPDPBUSDS that does the saturation.
Currently we have optimization rules like

x.DotProductQuadrupleSaturated(y).Add(z) -> VPDPBUSDS

which is incorrect, in that the dot product doesn't do (or need)
saturation, and the Add is a regular Add, but we rewrite it to a
saturated add. The correct rule should be something like

x.DotProductQuadruple(y).AddSaturated(z) -> VPDPBUSDS

2. There are multiple flavors of DotProductQuadruple:
signed/unsigned × signed/unsigned, which cannot be completely
disambiguated by the type. The current naming may preclude adding
all the flavors.

For these reasons, remove the methods for now. We can add them
later with the issues addressed.

Change-Id: I549c0925afaa68c7e2cc956105619f2c1b46b325
Reviewed-on: https://go-review.googlesource.com/c/go/+/731441
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
12 files changed:
src/cmd/compile/internal/amd64/simdssa.go
src/cmd/compile/internal/ssa/_gen/AMD64.rules
src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/cmd/compile/internal/ssagen/simdintrinsics.go
src/simd/archsimd/_gen/simdgen/ops/MLOps/categories.yaml
src/simd/archsimd/_gen/simdgen/ops/MLOps/go.yaml
src/simd/archsimd/internal/simd_test/simd_test.go
src/simd/archsimd/ops_amd64.go

index 454dbb308090a8fbef637689a1d62f1789173fbd..e3fc2fb380b4330ca7492efbe47d8b9374c4ff13 100644 (file)
@@ -1324,12 +1324,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPERMI2Q256,
                ssa.OpAMD64VPERMI2PD512,
                ssa.OpAMD64VPERMI2Q512,
-               ssa.OpAMD64VPDPBUSD128,
-               ssa.OpAMD64VPDPBUSD256,
-               ssa.OpAMD64VPDPBUSD512,
-               ssa.OpAMD64VPDPBUSDS128,
-               ssa.OpAMD64VPDPBUSDS256,
-               ssa.OpAMD64VPDPBUSDS512,
                ssa.OpAMD64VFMADD213PS128,
                ssa.OpAMD64VFMADD213PS256,
                ssa.OpAMD64VFMADD213PS512,
@@ -1446,12 +1440,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPMADDUBSWMasked128Merging,
                ssa.OpAMD64VPMADDUBSWMasked256Merging,
                ssa.OpAMD64VPMADDUBSWMasked512Merging,
-               ssa.OpAMD64VPDPBUSDMasked128,
-               ssa.OpAMD64VPDPBUSDMasked256,
-               ssa.OpAMD64VPDPBUSDMasked512,
-               ssa.OpAMD64VPDPBUSDSMasked128,
-               ssa.OpAMD64VPDPBUSDSMasked256,
-               ssa.OpAMD64VPDPBUSDSMasked512,
                ssa.OpAMD64VGF2P8MULBMasked128Merging,
                ssa.OpAMD64VGF2P8MULBMasked256Merging,
                ssa.OpAMD64VGF2P8MULBMasked512Merging,
@@ -1971,8 +1959,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPERMI2Q256load,
                ssa.OpAMD64VPERMI2PD512load,
                ssa.OpAMD64VPERMI2Q512load,
-               ssa.OpAMD64VPDPBUSD512load,
-               ssa.OpAMD64VPDPBUSDS512load,
                ssa.OpAMD64VFMADD213PS128load,
                ssa.OpAMD64VFMADD213PS256load,
                ssa.OpAMD64VFMADD213PS512load,
@@ -2020,12 +2006,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPERMI2QMasked256load,
                ssa.OpAMD64VPERMI2PDMasked512load,
                ssa.OpAMD64VPERMI2QMasked512load,
-               ssa.OpAMD64VPDPBUSDMasked128load,
-               ssa.OpAMD64VPDPBUSDMasked256load,
-               ssa.OpAMD64VPDPBUSDMasked512load,
-               ssa.OpAMD64VPDPBUSDSMasked128load,
-               ssa.OpAMD64VPDPBUSDSMasked256load,
-               ssa.OpAMD64VPDPBUSDSMasked512load,
                ssa.OpAMD64VFMADD213PSMasked128load,
                ssa.OpAMD64VFMADD213PSMasked256load,
                ssa.OpAMD64VFMADD213PSMasked512load,
@@ -3045,18 +3025,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPMADDUBSWMasked128,
                ssa.OpAMD64VPMADDUBSWMasked256,
                ssa.OpAMD64VPMADDUBSWMasked512,
-               ssa.OpAMD64VPDPBUSDMasked128,
-               ssa.OpAMD64VPDPBUSDMasked128load,
-               ssa.OpAMD64VPDPBUSDMasked256,
-               ssa.OpAMD64VPDPBUSDMasked256load,
-               ssa.OpAMD64VPDPBUSDMasked512,
-               ssa.OpAMD64VPDPBUSDMasked512load,
-               ssa.OpAMD64VPDPBUSDSMasked128,
-               ssa.OpAMD64VPDPBUSDSMasked128load,
-               ssa.OpAMD64VPDPBUSDSMasked256,
-               ssa.OpAMD64VPDPBUSDSMasked256load,
-               ssa.OpAMD64VPDPBUSDSMasked512,
-               ssa.OpAMD64VPDPBUSDSMasked512load,
                ssa.OpAMD64VEXPANDPSMasked128,
                ssa.OpAMD64VEXPANDPSMasked256,
                ssa.OpAMD64VEXPANDPSMasked512,
index 353d2721792c63f2a23b4b3e5feabbb1d58fffc9..38ca44f7eb092d09b98d5d12924e44e7859985fd 100644 (file)
 (EQ (VPTEST x:(VPAND(D|Q)512 j k) y) yes no) && x == y && x.Uses == 2 => (EQ (VPTEST j k) yes no)
 (EQ (VPTEST x:(VPANDN(128|256) j k) y) yes no) && x == y && x.Uses == 2 => (ULT (VPTEST k j) yes no) // AndNot has swapped its operand order
 (EQ (VPTEST x:(VPANDN(D|Q)512 j k) y) yes no) && x == y && x.Uses == 2 => (ULT (VPTEST k j) yes no) // AndNot has swapped its operand order
-
-// DotProductQuadruple optimizations
-(VPADDD128 (VPDPBUSD128 (Zero128 <t>) x y) z) => (VPDPBUSD128 <t> z x y)
-(VPADDD256 (VPDPBUSD256 (Zero256 <t>) x y) z) => (VPDPBUSD256 <t> z x y)
-(VPADDD512 (VPDPBUSD512 (Zero512 <t>) x y) z) => (VPDPBUSD512 <t> z x y)
-(VPADDD128 (VPDPBUSDS128 (Zero128 <t>) x y) z) => (VPDPBUSDS128 <t> z x y)
-(VPADDD256 (VPDPBUSDS256 (Zero256 <t>) x y) z) => (VPDPBUSDS256 <t> z x y)
-(VPADDD512 (VPDPBUSDS512 (Zero512 <t>) x y) z) => (VPDPBUSDS512 <t> z x y)
\ No newline at end of file
index 6b1cac322cdaedf118f325a3e03618928006d0df..793cf5c97f133faa045376e921460bffb4aa8848 100644 (file)
 (DotProductPairsSaturatedUint8x16 ...) => (VPMADDUBSW128 ...)
 (DotProductPairsSaturatedUint8x32 ...) => (VPMADDUBSW256 ...)
 (DotProductPairsSaturatedUint8x64 ...) => (VPMADDUBSW512 ...)
-(DotProductQuadrupleInt32x4 ...) => (VPDPBUSD128 ...)
-(DotProductQuadrupleInt32x8 ...) => (VPDPBUSD256 ...)
-(DotProductQuadrupleInt32x16 ...) => (VPDPBUSD512 ...)
-(DotProductQuadrupleSaturatedInt32x4 ...) => (VPDPBUSDS128 ...)
-(DotProductQuadrupleSaturatedInt32x8 ...) => (VPDPBUSDS256 ...)
-(DotProductQuadrupleSaturatedInt32x16 ...) => (VPDPBUSDS512 ...)
 (EqualFloat32x4 x y) => (VCMPPS128 [0] x y)
 (EqualFloat32x8 x y) => (VCMPPS256 [0] x y)
 (EqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [0] x y))
 (VMOVDQU16Masked128 (VPMADDUBSW128 x y) mask) => (VPMADDUBSWMasked128 x y mask)
 (VMOVDQU16Masked256 (VPMADDUBSW256 x y) mask) => (VPMADDUBSWMasked256 x y mask)
 (VMOVDQU16Masked512 (VPMADDUBSW512 x y) mask) => (VPMADDUBSWMasked512 x y mask)
-(VMOVDQU32Masked128 (VPDPBUSD128 x y z) mask) => (VPDPBUSDMasked128 x y z mask)
-(VMOVDQU32Masked256 (VPDPBUSD256 x y z) mask) => (VPDPBUSDMasked256 x y z mask)
-(VMOVDQU32Masked512 (VPDPBUSD512 x y z) mask) => (VPDPBUSDMasked512 x y z mask)
-(VMOVDQU32Masked128 (VPDPBUSDS128 x y z) mask) => (VPDPBUSDSMasked128 x y z mask)
-(VMOVDQU32Masked256 (VPDPBUSDS256 x y z) mask) => (VPDPBUSDSMasked256 x y z mask)
-(VMOVDQU32Masked512 (VPDPBUSDS512 x y z) mask) => (VPDPBUSDSMasked512 x y z mask)
 (VMOVDQU8Masked128 (VPMOVSXBQ128 x) mask) => (VPMOVSXBQMasked128 x mask)
 (VMOVDQU16Masked128 (VPMOVSXWQ128 x) mask) => (VPMOVSXWQMasked128 x mask)
 (VMOVDQU32Masked128 (VPMOVSXDQ128 x) mask) => (VPMOVSXDQMasked128 x mask)
 (VDIVPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked128load {sym} [off] x ptr mask mem)
 (VDIVPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked256load {sym} [off] x ptr mask mem)
 (VDIVPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked512load {sym} [off] x ptr mask mem)
-(VPDPBUSD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSD512load {sym} [off] x y ptr mem)
-(VPDPBUSDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked128load {sym} [off] x y ptr mask mem)
-(VPDPBUSDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked256load {sym} [off] x y ptr mask mem)
-(VPDPBUSDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked512load {sym} [off] x y ptr mask mem)
-(VPDPBUSDS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDS512load {sym} [off] x y ptr mem)
-(VPDPBUSDSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDSMasked128load {sym} [off] x y ptr mask mem)
-(VPDPBUSDSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDSMasked256load {sym} [off] x y ptr mask mem)
-(VPDPBUSDSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDSMasked512load {sym} [off] x y ptr mask mem)
 (VPCMPEQD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQD512load {sym} [off] x ptr mem)
 (VPCMPEQQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQQ512load {sym} [off] x ptr mem)
 (VCMPPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCMPPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
index cd0cedc831f4e5828a1c051c25acb39cbb2a4bbf..c81e6b78698e50ac5383c34dc18d9326961165b3 100644 (file)
@@ -452,18 +452,6 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VPCOMPRESSWMasked128", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPCOMPRESSWMasked256", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VPCOMPRESSWMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec512", resultInArg0: false},
-               {name: "VPDPBUSD128", argLength: 3, reg: v31, asm: "VPDPBUSD", commutative: false, typ: "Vec128", resultInArg0: true},
-               {name: "VPDPBUSD256", argLength: 3, reg: v31, asm: "VPDPBUSD", commutative: false, typ: "Vec256", resultInArg0: true},
-               {name: "VPDPBUSD512", argLength: 3, reg: w31, asm: "VPDPBUSD", commutative: false, typ: "Vec512", resultInArg0: true},
-               {name: "VPDPBUSDMasked128", argLength: 4, reg: w3kw, asm: "VPDPBUSD", commutative: false, typ: "Vec128", resultInArg0: true},
-               {name: "VPDPBUSDMasked256", argLength: 4, reg: w3kw, asm: "VPDPBUSD", commutative: false, typ: "Vec256", resultInArg0: true},
-               {name: "VPDPBUSDMasked512", argLength: 4, reg: w3kw, asm: "VPDPBUSD", commutative: false, typ: "Vec512", resultInArg0: true},
-               {name: "VPDPBUSDS128", argLength: 3, reg: v31, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true},
-               {name: "VPDPBUSDS256", argLength: 3, reg: v31, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true},
-               {name: "VPDPBUSDS512", argLength: 3, reg: w31, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true},
-               {name: "VPDPBUSDSMasked128", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true},
-               {name: "VPDPBUSDSMasked256", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true},
-               {name: "VPDPBUSDSMasked512", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true},
                {name: "VPDPWSSD128", argLength: 3, reg: v31, asm: "VPDPWSSD", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPDPWSSD256", argLength: 3, reg: v31, asm: "VPDPWSSD", commutative: false, typ: "Vec256", resultInArg0: true},
                {name: "VPDPWSSD512", argLength: 3, reg: w31, asm: "VPDPWSSD", commutative: false, typ: "Vec512", resultInArg0: true},
@@ -1714,14 +1702,6 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VPCMPEQQ512load", argLength: 3, reg: w2kload, asm: "VPCMPEQQ", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false},
                {name: "VPCMPGTD512load", argLength: 3, reg: w2kload, asm: "VPCMPGTD", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false},
                {name: "VPCMPGTQ512load", argLength: 3, reg: w2kload, asm: "VPCMPGTQ", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false},
-               {name: "VPDPBUSD512load", argLength: 4, reg: w31load, asm: "VPDPBUSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
-               {name: "VPDPBUSDMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPBUSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
-               {name: "VPDPBUSDMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPBUSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
-               {name: "VPDPBUSDMasked512load", argLength: 5, reg: w3kwload, asm: "VPDPBUSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
-               {name: "VPDPBUSDS512load", argLength: 4, reg: w31load, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
-               {name: "VPDPBUSDSMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
-               {name: "VPDPBUSDSMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
-               {name: "VPDPBUSDSMasked512load", argLength: 5, reg: w3kwload, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
                {name: "VPDPWSSD512load", argLength: 4, reg: w31load, asm: "VPDPWSSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
                {name: "VPDPWSSDMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPWSSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
                {name: "VPDPWSSDMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPWSSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
index 07878e2c69356988f9a0f7c0b71d470708c6fd6f..8afa2bf25925a6fdf21130446472ba96bdc12970 100644 (file)
@@ -304,12 +304,6 @@ func simdGenericOps() []opData {
                {name: "DotProductPairsSaturatedUint8x16", argLength: 2, commutative: false},
                {name: "DotProductPairsSaturatedUint8x32", argLength: 2, commutative: false},
                {name: "DotProductPairsSaturatedUint8x64", argLength: 2, commutative: false},
-               {name: "DotProductQuadrupleInt32x4", argLength: 3, commutative: false},
-               {name: "DotProductQuadrupleInt32x8", argLength: 3, commutative: false},
-               {name: "DotProductQuadrupleInt32x16", argLength: 3, commutative: false},
-               {name: "DotProductQuadrupleSaturatedInt32x4", argLength: 3, commutative: false},
-               {name: "DotProductQuadrupleSaturatedInt32x8", argLength: 3, commutative: false},
-               {name: "DotProductQuadrupleSaturatedInt32x16", argLength: 3, commutative: false},
                {name: "EqualFloat32x4", argLength: 2, commutative: true},
                {name: "EqualFloat32x8", argLength: 2, commutative: true},
                {name: "EqualFloat32x16", argLength: 2, commutative: true},
index ab7ca8de0dea89c7f5f4881f61da0bc83943beb6..9ba57675965090e626fff0d4a1d8464e0890e75d 100644 (file)
@@ -1693,18 +1693,6 @@ const (
        OpAMD64VPCOMPRESSWMasked128
        OpAMD64VPCOMPRESSWMasked256
        OpAMD64VPCOMPRESSWMasked512
-       OpAMD64VPDPBUSD128
-       OpAMD64VPDPBUSD256
-       OpAMD64VPDPBUSD512
-       OpAMD64VPDPBUSDMasked128
-       OpAMD64VPDPBUSDMasked256
-       OpAMD64VPDPBUSDMasked512
-       OpAMD64VPDPBUSDS128
-       OpAMD64VPDPBUSDS256
-       OpAMD64VPDPBUSDS512
-       OpAMD64VPDPBUSDSMasked128
-       OpAMD64VPDPBUSDSMasked256
-       OpAMD64VPDPBUSDSMasked512
        OpAMD64VPDPWSSD128
        OpAMD64VPDPWSSD256
        OpAMD64VPDPWSSD512
@@ -2955,14 +2943,6 @@ const (
        OpAMD64VPCMPEQQ512load
        OpAMD64VPCMPGTD512load
        OpAMD64VPCMPGTQ512load
-       OpAMD64VPDPBUSD512load
-       OpAMD64VPDPBUSDMasked128load
-       OpAMD64VPDPBUSDMasked256load
-       OpAMD64VPDPBUSDMasked512load
-       OpAMD64VPDPBUSDS512load
-       OpAMD64VPDPBUSDSMasked128load
-       OpAMD64VPDPBUSDSMasked256load
-       OpAMD64VPDPBUSDSMasked512load
        OpAMD64VPDPWSSD512load
        OpAMD64VPDPWSSDMasked128load
        OpAMD64VPDPWSSDMasked256load
@@ -6478,12 +6458,6 @@ const (
        OpDotProductPairsSaturatedUint8x16
        OpDotProductPairsSaturatedUint8x32
        OpDotProductPairsSaturatedUint8x64
-       OpDotProductQuadrupleInt32x4
-       OpDotProductQuadrupleInt32x8
-       OpDotProductQuadrupleInt32x16
-       OpDotProductQuadrupleSaturatedInt32x4
-       OpDotProductQuadrupleSaturatedInt32x8
-       OpDotProductQuadrupleSaturatedInt32x16
        OpEqualFloat32x4
        OpEqualFloat32x8
        OpEqualFloat32x16
@@ -27293,204 +27267,6 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
-       {
-               name:         "VPDPBUSD128",
-               argLen:       3,
-               resultInArg0: true,
-               asm:          x86.AVPDPBUSD,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-                               {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
-                               {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
-                       },
-                       outputs: []outputInfo{
-                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-                       },
-               },
-       },
-       {
-               name:         "VPDPBUSD256",
-               argLen:       3,
-               resultInArg0: true,
-               asm:          x86.AVPDPBUSD,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-                               {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
-                               {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
-                       },
-                       outputs: []outputInfo{
-                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-                       },
-               },
-       },
-       {
-               name:         "VPDPBUSD512",
-               argLen:       3,
-               resultInArg0: true,
-               asm:          x86.AVPDPBUSD,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
-       {
-               name:         "VPDPBUSDMasked128",
-               argLen:       4,
-               resultInArg0: true,
-               asm:          x86.AVPDPBUSD,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {2, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
-       {
-               name:         "VPDPBUSDMasked256",
-               argLen:       4,
-               resultInArg0: true,
-               asm:          x86.AVPDPBUSD,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {2, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
-       {
-               name:         "VPDPBUSDMasked512",
-               argLen:       4,
-               resultInArg0: true,
-               asm:          x86.AVPDPBUSD,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {2, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
-       {
-               name:         "VPDPBUSDS128",
-               argLen:       3,
-               resultInArg0: true,
-               asm:          x86.AVPDPBUSDS,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-                               {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
-                               {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
-                       },
-                       outputs: []outputInfo{
-                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-                       },
-               },
-       },
-       {
-               name:         "VPDPBUSDS256",
-               argLen:       3,
-               resultInArg0: true,
-               asm:          x86.AVPDPBUSDS,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-                               {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
-                               {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
-                       },
-                       outputs: []outputInfo{
-                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-                       },
-               },
-       },
-       {
-               name:         "VPDPBUSDS512",
-               argLen:       3,
-               resultInArg0: true,
-               asm:          x86.AVPDPBUSDS,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
-       {
-               name:         "VPDPBUSDSMasked128",
-               argLen:       4,
-               resultInArg0: true,
-               asm:          x86.AVPDPBUSDS,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {2, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
-       {
-               name:         "VPDPBUSDSMasked256",
-               argLen:       4,
-               resultInArg0: true,
-               asm:          x86.AVPDPBUSDS,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {2, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
-       {
-               name:         "VPDPBUSDSMasked512",
-               argLen:       4,
-               resultInArg0: true,
-               asm:          x86.AVPDPBUSDS,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {2, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
        {
                name:         "VPDPWSSD128",
                argLen:       3,
@@ -46191,156 +45967,6 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
-       {
-               name:         "VPDPBUSD512load",
-               auxType:      auxSymOff,
-               argLen:       4,
-               resultInArg0: true,
-               symEffect:    SymRead,
-               asm:          x86.AVPDPBUSD,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
-                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
-       {
-               name:         "VPDPBUSDMasked128load",
-               auxType:      auxSymOff,
-               argLen:       5,
-               resultInArg0: true,
-               symEffect:    SymRead,
-               asm:          x86.AVPDPBUSD,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-                               {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
-                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
-       {
-               name:         "VPDPBUSDMasked256load",
-               auxType:      auxSymOff,
-               argLen:       5,
-               resultInArg0: true,
-               symEffect:    SymRead,
-               asm:          x86.AVPDPBUSD,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-                               {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
-                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
-       {
-               name:         "VPDPBUSDMasked512load",
-               auxType:      auxSymOff,
-               argLen:       5,
-               resultInArg0: true,
-               symEffect:    SymRead,
-               asm:          x86.AVPDPBUSD,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-                               {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
-                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
-       {
-               name:         "VPDPBUSDS512load",
-               auxType:      auxSymOff,
-               argLen:       4,
-               resultInArg0: true,
-               symEffect:    SymRead,
-               asm:          x86.AVPDPBUSDS,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
-                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
-       {
-               name:         "VPDPBUSDSMasked128load",
-               auxType:      auxSymOff,
-               argLen:       5,
-               resultInArg0: true,
-               symEffect:    SymRead,
-               asm:          x86.AVPDPBUSDS,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-                               {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
-                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
-       {
-               name:         "VPDPBUSDSMasked256load",
-               auxType:      auxSymOff,
-               argLen:       5,
-               resultInArg0: true,
-               symEffect:    SymRead,
-               asm:          x86.AVPDPBUSDS,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-                               {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
-                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
-       {
-               name:         "VPDPBUSDSMasked512load",
-               auxType:      auxSymOff,
-               argLen:       5,
-               resultInArg0: true,
-               symEffect:    SymRead,
-               asm:          x86.AVPDPBUSDS,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-                               {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
-                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
        {
                name:         "VPDPWSSD512load",
                auxType:      auxSymOff,
@@ -90939,36 +90565,6 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
-       {
-               name:    "DotProductQuadrupleInt32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DotProductQuadrupleInt32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DotProductQuadrupleInt32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DotProductQuadrupleSaturatedInt32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DotProductQuadrupleSaturatedInt32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DotProductQuadrupleSaturatedInt32x16",
-               argLen:  3,
-               generic: true,
-       },
        {
                name:        "EqualFloat32x4",
                argLen:      2,
index a0f4f6a704dc9e1216d5d5f12f2afc928715d72d..35e9516f614774ebb18baebefa3b148e4bd2decc 100644 (file)
@@ -1006,10 +1006,6 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64VPACKUSDWMasked256(v)
        case OpAMD64VPACKUSDWMasked512:
                return rewriteValueAMD64_OpAMD64VPACKUSDWMasked512(v)
-       case OpAMD64VPADDD128:
-               return rewriteValueAMD64_OpAMD64VPADDD128(v)
-       case OpAMD64VPADDD256:
-               return rewriteValueAMD64_OpAMD64VPADDD256(v)
        case OpAMD64VPADDD512:
                return rewriteValueAMD64_OpAMD64VPADDD512(v)
        case OpAMD64VPADDDMasked128:
@@ -1126,22 +1122,6 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64VPCMPUQMasked256(v)
        case OpAMD64VPCMPUQMasked512:
                return rewriteValueAMD64_OpAMD64VPCMPUQMasked512(v)
-       case OpAMD64VPDPBUSD512:
-               return rewriteValueAMD64_OpAMD64VPDPBUSD512(v)
-       case OpAMD64VPDPBUSDMasked128:
-               return rewriteValueAMD64_OpAMD64VPDPBUSDMasked128(v)
-       case OpAMD64VPDPBUSDMasked256:
-               return rewriteValueAMD64_OpAMD64VPDPBUSDMasked256(v)
-       case OpAMD64VPDPBUSDMasked512:
-               return rewriteValueAMD64_OpAMD64VPDPBUSDMasked512(v)
-       case OpAMD64VPDPBUSDS512:
-               return rewriteValueAMD64_OpAMD64VPDPBUSDS512(v)
-       case OpAMD64VPDPBUSDSMasked128:
-               return rewriteValueAMD64_OpAMD64VPDPBUSDSMasked128(v)
-       case OpAMD64VPDPBUSDSMasked256:
-               return rewriteValueAMD64_OpAMD64VPDPBUSDSMasked256(v)
-       case OpAMD64VPDPBUSDSMasked512:
-               return rewriteValueAMD64_OpAMD64VPDPBUSDSMasked512(v)
        case OpAMD64VPDPWSSD512:
                return rewriteValueAMD64_OpAMD64VPDPWSSD512(v)
        case OpAMD64VPDPWSSDMasked128:
@@ -3142,24 +3122,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpDotProductPairsSaturatedUint8x64:
                v.Op = OpAMD64VPMADDUBSW512
                return true
-       case OpDotProductQuadrupleInt32x16:
-               v.Op = OpAMD64VPDPBUSD512
-               return true
-       case OpDotProductQuadrupleInt32x4:
-               v.Op = OpAMD64VPDPBUSD128
-               return true
-       case OpDotProductQuadrupleInt32x8:
-               v.Op = OpAMD64VPDPBUSD256
-               return true
-       case OpDotProductQuadrupleSaturatedInt32x16:
-               v.Op = OpAMD64VPDPBUSDS512
-               return true
-       case OpDotProductQuadrupleSaturatedInt32x4:
-               v.Op = OpAMD64VPDPBUSDS128
-               return true
-       case OpDotProductQuadrupleSaturatedInt32x8:
-               v.Op = OpAMD64VPDPBUSDS256
-               return true
        case OpEq16:
                return rewriteValueAMD64_OpEq16(v)
        case OpEq32:
@@ -35318,34 +35280,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool {
                v.AddArg3(x, y, mask)
                return true
        }
-       // match: (VMOVDQU32Masked128 (VPDPBUSD128 x y z) mask)
-       // result: (VPDPBUSDMasked128 x y z mask)
-       for {
-               if v_0.Op != OpAMD64VPDPBUSD128 {
-                       break
-               }
-               z := v_0.Args[2]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               mask := v_1
-               v.reset(OpAMD64VPDPBUSDMasked128)
-               v.AddArg4(x, y, z, mask)
-               return true
-       }
-       // match: (VMOVDQU32Masked128 (VPDPBUSDS128 x y z) mask)
-       // result: (VPDPBUSDSMasked128 x y z mask)
-       for {
-               if v_0.Op != OpAMD64VPDPBUSDS128 {
-                       break
-               }
-               z := v_0.Args[2]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               mask := v_1
-               v.reset(OpAMD64VPDPBUSDSMasked128)
-               v.AddArg4(x, y, z, mask)
-               return true
-       }
        // match: (VMOVDQU32Masked128 (VPMOVSXDQ128 x) mask)
        // result: (VPMOVSXDQMasked128 x mask)
        for {
@@ -36165,34 +36099,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool {
                v.AddArg3(x, y, mask)
                return true
        }
-       // match: (VMOVDQU32Masked256 (VPDPBUSD256 x y z) mask)
-       // result: (VPDPBUSDMasked256 x y z mask)
-       for {
-               if v_0.Op != OpAMD64VPDPBUSD256 {
-                       break
-               }
-               z := v_0.Args[2]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               mask := v_1
-               v.reset(OpAMD64VPDPBUSDMasked256)
-               v.AddArg4(x, y, z, mask)
-               return true
-       }
-       // match: (VMOVDQU32Masked256 (VPDPBUSDS256 x y z) mask)
-       // result: (VPDPBUSDSMasked256 x y z mask)
-       for {
-               if v_0.Op != OpAMD64VPDPBUSDS256 {
-                       break
-               }
-               z := v_0.Args[2]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               mask := v_1
-               v.reset(OpAMD64VPDPBUSDSMasked256)
-               v.AddArg4(x, y, z, mask)
-               return true
-       }
        // match: (VMOVDQU32Masked256 (VPMOVSXDQ256 x) mask)
        // result: (VPMOVSXDQMasked256 x mask)
        for {
@@ -37100,34 +37006,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool {
                v.AddArg3(x, y, mask)
                return true
        }
-       // match: (VMOVDQU32Masked512 (VPDPBUSD512 x y z) mask)
-       // result: (VPDPBUSDMasked512 x y z mask)
-       for {
-               if v_0.Op != OpAMD64VPDPBUSD512 {
-                       break
-               }
-               z := v_0.Args[2]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               mask := v_1
-               v.reset(OpAMD64VPDPBUSDMasked512)
-               v.AddArg4(x, y, z, mask)
-               return true
-       }
-       // match: (VMOVDQU32Masked512 (VPDPBUSDS512 x y z) mask)
-       // result: (VPDPBUSDSMasked512 x y z mask)
-       for {
-               if v_0.Op != OpAMD64VPDPBUSDS512 {
-                       break
-               }
-               z := v_0.Args[2]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               mask := v_1
-               v.reset(OpAMD64VPDPBUSDSMasked512)
-               v.AddArg4(x, y, z, mask)
-               return true
-       }
        // match: (VMOVDQU32Masked512 (VPMOVSXDQ512 x) mask)
        // result: (VPMOVSXDQMasked512 x mask)
        for {
@@ -42503,151 +42381,9 @@ func rewriteValueAMD64_OpAMD64VPACKUSDWMasked512(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64VPADDD128(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPADDD128 (VPDPBUSD128 (Zero128 <t>) x y) z)
-       // result: (VPDPBUSD128 <t> z x y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64VPDPBUSD128 {
-                               continue
-                       }
-                       y := v_0.Args[2]
-                       v_0_0 := v_0.Args[0]
-                       if v_0_0.Op != OpAMD64Zero128 {
-                               continue
-                       }
-                       t := v_0_0.Type
-                       x := v_0.Args[1]
-                       z := v_1
-                       v.reset(OpAMD64VPDPBUSD128)
-                       v.Type = t
-                       v.AddArg3(z, x, y)
-                       return true
-               }
-               break
-       }
-       // match: (VPADDD128 (VPDPBUSDS128 (Zero128 <t>) x y) z)
-       // result: (VPDPBUSDS128 <t> z x y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64VPDPBUSDS128 {
-                               continue
-                       }
-                       y := v_0.Args[2]
-                       v_0_0 := v_0.Args[0]
-                       if v_0_0.Op != OpAMD64Zero128 {
-                               continue
-                       }
-                       t := v_0_0.Type
-                       x := v_0.Args[1]
-                       z := v_1
-                       v.reset(OpAMD64VPDPBUSDS128)
-                       v.Type = t
-                       v.AddArg3(z, x, y)
-                       return true
-               }
-               break
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPADDD256(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPADDD256 (VPDPBUSD256 (Zero256 <t>) x y) z)
-       // result: (VPDPBUSD256 <t> z x y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64VPDPBUSD256 {
-                               continue
-                       }
-                       y := v_0.Args[2]
-                       v_0_0 := v_0.Args[0]
-                       if v_0_0.Op != OpAMD64Zero256 {
-                               continue
-                       }
-                       t := v_0_0.Type
-                       x := v_0.Args[1]
-                       z := v_1
-                       v.reset(OpAMD64VPDPBUSD256)
-                       v.Type = t
-                       v.AddArg3(z, x, y)
-                       return true
-               }
-               break
-       }
-       // match: (VPADDD256 (VPDPBUSDS256 (Zero256 <t>) x y) z)
-       // result: (VPDPBUSDS256 <t> z x y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64VPDPBUSDS256 {
-                               continue
-                       }
-                       y := v_0.Args[2]
-                       v_0_0 := v_0.Args[0]
-                       if v_0_0.Op != OpAMD64Zero256 {
-                               continue
-                       }
-                       t := v_0_0.Type
-                       x := v_0.Args[1]
-                       z := v_1
-                       v.reset(OpAMD64VPDPBUSDS256)
-                       v.Type = t
-                       v.AddArg3(z, x, y)
-                       return true
-               }
-               break
-       }
-       return false
-}
 func rewriteValueAMD64_OpAMD64VPADDD512(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (VPADDD512 (VPDPBUSD512 (Zero512 <t>) x y) z)
-       // result: (VPDPBUSD512 <t> z x y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64VPDPBUSD512 {
-                               continue
-                       }
-                       y := v_0.Args[2]
-                       v_0_0 := v_0.Args[0]
-                       if v_0_0.Op != OpAMD64Zero512 {
-                               continue
-                       }
-                       t := v_0_0.Type
-                       x := v_0.Args[1]
-                       z := v_1
-                       v.reset(OpAMD64VPDPBUSD512)
-                       v.Type = t
-                       v.AddArg3(z, x, y)
-                       return true
-               }
-               break
-       }
-       // match: (VPADDD512 (VPDPBUSDS512 (Zero512 <t>) x y) z)
-       // result: (VPDPBUSDS512 <t> z x y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64VPDPBUSDS512 {
-                               continue
-                       }
-                       y := v_0.Args[2]
-                       v_0_0 := v_0.Args[0]
-                       if v_0_0.Op != OpAMD64Zero512 {
-                               continue
-                       }
-                       t := v_0_0.Type
-                       x := v_0.Args[1]
-                       z := v_1
-                       v.reset(OpAMD64VPDPBUSDS512)
-                       v.Type = t
-                       v.AddArg3(z, x, y)
-                       return true
-               }
-               break
-       }
        // match: (VPADDD512 x l:(VMOVDQUload512 {sym} [off] ptr mem))
        // cond: canMergeLoad(v, l) && clobber(l)
        // result: (VPADDD512load {sym} [off] x ptr mem)
@@ -53370,250 +53106,6 @@ func rewriteValueAMD64_OpAMD64VPCMPUQMasked512(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64VPDPBUSD512(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPDPBUSD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem))
-       // cond: canMergeLoad(v, l) && clobber(l)
-       // result: (VPDPBUSD512load {sym} [off] x y ptr mem)
-       for {
-               x := v_0
-               y := v_1
-               l := v_2
-               if l.Op != OpAMD64VMOVDQUload512 {
-                       break
-               }
-               off := auxIntToInt32(l.AuxInt)
-               sym := auxToSym(l.Aux)
-               mem := l.Args[1]
-               ptr := l.Args[0]
-               if !(canMergeLoad(v, l) && clobber(l)) {
-                       break
-               }
-               v.reset(OpAMD64VPDPBUSD512load)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg4(x, y, ptr, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPDPBUSDMasked128(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPDPBUSDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
-       // cond: canMergeLoad(v, l) && clobber(l)
-       // result: (VPDPBUSDMasked128load {sym} [off] x y ptr mask mem)
-       for {
-               x := v_0
-               y := v_1
-               l := v_2
-               if l.Op != OpAMD64VMOVDQUload128 {
-                       break
-               }
-               off := auxIntToInt32(l.AuxInt)
-               sym := auxToSym(l.Aux)
-               mem := l.Args[1]
-               ptr := l.Args[0]
-               mask := v_3
-               if !(canMergeLoad(v, l) && clobber(l)) {
-                       break
-               }
-               v.reset(OpAMD64VPDPBUSDMasked128load)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg5(x, y, ptr, mask, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPDPBUSDMasked256(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPDPBUSDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
-       // cond: canMergeLoad(v, l) && clobber(l)
-       // result: (VPDPBUSDMasked256load {sym} [off] x y ptr mask mem)
-       for {
-               x := v_0
-               y := v_1
-               l := v_2
-               if l.Op != OpAMD64VMOVDQUload256 {
-                       break
-               }
-               off := auxIntToInt32(l.AuxInt)
-               sym := auxToSym(l.Aux)
-               mem := l.Args[1]
-               ptr := l.Args[0]
-               mask := v_3
-               if !(canMergeLoad(v, l) && clobber(l)) {
-                       break
-               }
-               v.reset(OpAMD64VPDPBUSDMasked256load)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg5(x, y, ptr, mask, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPDPBUSDMasked512(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPDPBUSDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
-       // cond: canMergeLoad(v, l) && clobber(l)
-       // result: (VPDPBUSDMasked512load {sym} [off] x y ptr mask mem)
-       for {
-               x := v_0
-               y := v_1
-               l := v_2
-               if l.Op != OpAMD64VMOVDQUload512 {
-                       break
-               }
-               off := auxIntToInt32(l.AuxInt)
-               sym := auxToSym(l.Aux)
-               mem := l.Args[1]
-               ptr := l.Args[0]
-               mask := v_3
-               if !(canMergeLoad(v, l) && clobber(l)) {
-                       break
-               }
-               v.reset(OpAMD64VPDPBUSDMasked512load)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg5(x, y, ptr, mask, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPDPBUSDS512(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPDPBUSDS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem))
-       // cond: canMergeLoad(v, l) && clobber(l)
-       // result: (VPDPBUSDS512load {sym} [off] x y ptr mem)
-       for {
-               x := v_0
-               y := v_1
-               l := v_2
-               if l.Op != OpAMD64VMOVDQUload512 {
-                       break
-               }
-               off := auxIntToInt32(l.AuxInt)
-               sym := auxToSym(l.Aux)
-               mem := l.Args[1]
-               ptr := l.Args[0]
-               if !(canMergeLoad(v, l) && clobber(l)) {
-                       break
-               }
-               v.reset(OpAMD64VPDPBUSDS512load)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg4(x, y, ptr, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPDPBUSDSMasked128(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPDPBUSDSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
-       // cond: canMergeLoad(v, l) && clobber(l)
-       // result: (VPDPBUSDSMasked128load {sym} [off] x y ptr mask mem)
-       for {
-               x := v_0
-               y := v_1
-               l := v_2
-               if l.Op != OpAMD64VMOVDQUload128 {
-                       break
-               }
-               off := auxIntToInt32(l.AuxInt)
-               sym := auxToSym(l.Aux)
-               mem := l.Args[1]
-               ptr := l.Args[0]
-               mask := v_3
-               if !(canMergeLoad(v, l) && clobber(l)) {
-                       break
-               }
-               v.reset(OpAMD64VPDPBUSDSMasked128load)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg5(x, y, ptr, mask, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPDPBUSDSMasked256(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPDPBUSDSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
-       // cond: canMergeLoad(v, l) && clobber(l)
-       // result: (VPDPBUSDSMasked256load {sym} [off] x y ptr mask mem)
-       for {
-               x := v_0
-               y := v_1
-               l := v_2
-               if l.Op != OpAMD64VMOVDQUload256 {
-                       break
-               }
-               off := auxIntToInt32(l.AuxInt)
-               sym := auxToSym(l.Aux)
-               mem := l.Args[1]
-               ptr := l.Args[0]
-               mask := v_3
-               if !(canMergeLoad(v, l) && clobber(l)) {
-                       break
-               }
-               v.reset(OpAMD64VPDPBUSDSMasked256load)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg5(x, y, ptr, mask, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPDPBUSDSMasked512(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPDPBUSDSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
-       // cond: canMergeLoad(v, l) && clobber(l)
-       // result: (VPDPBUSDSMasked512load {sym} [off] x y ptr mask mem)
-       for {
-               x := v_0
-               y := v_1
-               l := v_2
-               if l.Op != OpAMD64VMOVDQUload512 {
-                       break
-               }
-               off := auxIntToInt32(l.AuxInt)
-               sym := auxToSym(l.Aux)
-               mem := l.Args[1]
-               ptr := l.Args[0]
-               mask := v_3
-               if !(canMergeLoad(v, l) && clobber(l)) {
-                       break
-               }
-               v.reset(OpAMD64VPDPBUSDSMasked512load)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg5(x, y, ptr, mask, mem)
-               return true
-       }
-       return false
-}
 func rewriteValueAMD64_OpAMD64VPDPWSSD512(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
index e1d7ac796dea058885ee07c6c5caa77b78c45ddd..6769122aa4d661a3475ff76eca087794f93c6181 100644 (file)
@@ -328,12 +328,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint8x16.DotProductPairsSaturated", opLen2(ssa.OpDotProductPairsSaturatedUint8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint8x32.DotProductPairsSaturated", opLen2(ssa.OpDotProductPairsSaturatedUint8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint8x64.DotProductPairsSaturated", opLen2(ssa.OpDotProductPairsSaturatedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.DotProductQuadruple", opLen3_31Zero3(ssa.OpDotProductQuadrupleInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.DotProductQuadruple", opLen3_31Zero3(ssa.OpDotProductQuadrupleInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.DotProductQuadruple", opLen3_31Zero3(ssa.OpDotProductQuadrupleInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.DotProductQuadrupleSaturated", opLen3_31Zero3(ssa.OpDotProductQuadrupleSaturatedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.DotProductQuadrupleSaturated", opLen3_31Zero3(ssa.OpDotProductQuadrupleSaturatedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.DotProductQuadrupleSaturated", opLen3_31Zero3(ssa.OpDotProductQuadrupleSaturatedInt32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int8x16.Equal", opLen2(ssa.OpEqualInt8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int8x32.Equal", opLen2(ssa.OpEqualInt8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int8x64.Equal", opLen2(ssa.OpEqualInt8x64, types.TypeVec512), sys.AMD64)
index 2b1da7adaf953f86b40d35f2a06a1ede011bae82..54a8ece57443c37024540b3776f2b7b5725c991e 100644 (file)
   documentation: !string |-
     // NAME multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
-# QuadDotProduct, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
 # - go: DotProductBroadcast
 #   commutative: true
 # #   documentation: !string |-
 #     // NAME multiplies all elements and broadcasts the sum.
-- go: DotProductQuadruple
-  commutative: false
-  documentation: !string |-
-    // NAME performs dot products on groups of 4 elements of x and y.
-    // NAME(x, y).Add(z) will be optimized to the full form of the underlying instruction.
-- go: DotProductQuadrupleSaturated
-  commutative: false
-  documentation: !string |-
-    // NAME multiplies performs dot products on groups of 4 elements of x and y.
-    // NAME(x, y).Add(z) will be optimized to the full form of the underlying instruction.
 - go: AddDotProductPairs
   commutative: false
   noTypes: "true"
index 4a1195b52d05537ac325f923b6315da1d1e04f26..18ce8a53b20281d798fcbb3c4dfa58c6dec1d16c 100644 (file)
 #     const: 127
 #   out:
 #   - *dpb_src
-- go: DotProductQuadruple
-  asm: "VPDPBUSD"
-  operandOrder: "31Zero3" # switch operand 3 and 1, and make 3 always 0
-  in:
-  - &qdpa_acc
-    go: $t_acc
-    base: int
-    elemBits: 32
-  - &qdpa_src1
-    go: $t_src1
-    base: uint
-    overwriteElementBits: 8
-  - &qdpa_src2
-    go: $t_src2
-    base: int
-    overwriteElementBits: 8
-  out:
-  - *qdpa_acc
-- go: DotProductQuadrupleSaturated
-  asm: "VPDPBUSDS"
-  operandOrder: "31Zero3" # switch operand 3 and 1, and make 3 always 0
-  in:
-  - *qdpa_acc
-  - *qdpa_src1
-  - *qdpa_src2
-  out:
-  - *qdpa_acc
 - go: AddDotProductPairs
   asm: "VPDPWSSD"
   in:
index 5fd7407dbee0c0a566ef7e24dab85b9de464dd14..21c4d87dec423f1033d26911ab348020a35935c9 100644 (file)
@@ -1123,44 +1123,6 @@ func TestMaskedMerge(t *testing.T) {
        }
 }
 
-func TestDotProductQuadruple(t *testing.T) {
-       if !archsimd.X86.AVXVNNI() {
-               t.Skip("Test requires X86.AVXVNNI, not available on this hardware")
-               return
-       }
-       xd := make([]int8, 16)
-       yd := make([]uint8, 16)
-       zd := make([]int32, 4)
-       wanted1 := make([]int32, 4)
-       wanted2 := make([]int32, 4)
-       res1 := make([]int32, 4)
-       res2 := make([]int32, 4)
-       for i := range 16 {
-               xd[i] = int8(i + 112)  // 112+15 = 127
-               yd[i] = uint8(i + 240) // 240+15 = 255
-       }
-       for i := range 4 {
-               i4 := 4 * i
-               wanted1[i] = int32(xd[i4])*int32(yd[i4]) + int32(xd[i4+1])*int32(yd[i4+1]) + int32(xd[i4+2])*int32(yd[i4+2]) + int32(xd[i4+3])*int32(yd[i4+3])
-               zd[i] = int32(i + 1)
-               wanted2[i] = wanted1[i] + zd[i]
-       }
-
-       x := archsimd.LoadInt8x16Slice(xd)
-       y := archsimd.LoadUint8x16Slice(yd)
-       z := archsimd.LoadInt32x4Slice(zd)
-       x.DotProductQuadruple(y).StoreSlice(res1)
-       x.DotProductQuadruple(y).Add(z).StoreSlice(res2)
-       for i := range 4 {
-               if res1[i] != wanted1[i] {
-                       t.Errorf("got %d wanted %d", res1[i], wanted1[i])
-               }
-               if res2[i] != wanted2[i] {
-                       t.Errorf("got %d wanted %d", res2[i], wanted2[i])
-               }
-       }
-}
-
 func TestPermuteScalars(t *testing.T) {
        x := []int32{11, 12, 13, 14}
        want := []int32{12, 13, 14, 11}
index 2a8a6bd4c616c0a74db94c701dce0a80431c378c..6f904f1cbc04d70893a76f29a91b58abe51110f3 100644 (file)
@@ -1980,46 +1980,6 @@ func (x Uint8x32) DotProductPairsSaturated(y Int8x32) Int16x16
 // Asm: VPMADDUBSW, CPU Feature: AVX512
 func (x Uint8x64) DotProductPairsSaturated(y Int8x64) Int16x32
 
-/* DotProductQuadruple */
-
-// DotProductQuadruple performs dot products on groups of 4 elements of x and y.
-// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction.
-//
-// Asm: VPDPBUSD, CPU Feature: AVXVNNI
-func (x Int8x16) DotProductQuadruple(y Uint8x16) Int32x4
-
-// DotProductQuadruple performs dot products on groups of 4 elements of x and y.
-// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction.
-//
-// Asm: VPDPBUSD, CPU Feature: AVXVNNI
-func (x Int8x32) DotProductQuadruple(y Uint8x32) Int32x8
-
-// DotProductQuadruple performs dot products on groups of 4 elements of x and y.
-// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction.
-//
-// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int8x64) DotProductQuadruple(y Uint8x64) Int32x16
-
-/* DotProductQuadrupleSaturated */
-
-// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y.
-// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction.
-//
-// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
-func (x Int8x16) DotProductQuadrupleSaturated(y Uint8x16) Int32x4
-
-// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y.
-// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction.
-//
-// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
-func (x Int8x32) DotProductQuadrupleSaturated(y Uint8x32) Int32x8
-
-// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y.
-// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction.
-//
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x64) DotProductQuadrupleSaturated(y Uint8x64) Int32x16
-
 /* Equal */
 
 // Equal returns a mask whose elements indicate whether x == y.