From 70c22e0ad7d89504ab26fb157864f61a79cd4d47 Mon Sep 17 00:00:00 2001 From: Cherry Mui Date: Fri, 19 Dec 2025 17:05:03 -0500 Subject: [PATCH] simd/archsimd: delete DotProductQuadruple methods for now MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The DotProductQuadruple methods are currently defined on Int8 vectors. There are some problems for that. 1. We defined a DotProductQuadrupleSaturated method, but the dot product part does not need saturation, as it cannot overflow. It is the addition part of VPDPBUSDS that does the saturation. Currently we have optimization rules like x.DotProductQuadrupleSaturated(y).Add(z) -> VPDPBUSDS which is incorrect, in that the dot product doesn't do (or need) saturation, and the Add is a regular Add, but we rewrite it to a saturated add. The correct rule should be something like x.DotProductQuadruple(y).AddSaturated(z) -> VPDPBUSDS 2. There are multiple flavors of DotProductQuadruple: signed/unsigned × signed/unsigned, which cannot be completely disambiguated by the type. The current naming may preclude adding all the flavors. For these reasons, remove the methods for now. We can add them later with the issues addressed. Change-Id: I549c0925afaa68c7e2cc956105619f2c1b46b325 Reviewed-on: https://go-review.googlesource.com/c/go/+/731441 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- src/cmd/compile/internal/amd64/simdssa.go | 32 -- src/cmd/compile/internal/ssa/_gen/AMD64.rules | 8 - .../compile/internal/ssa/_gen/simdAMD64.rules | 20 - .../compile/internal/ssa/_gen/simdAMD64ops.go | 20 - .../internal/ssa/_gen/simdgenericOps.go | 6 - src/cmd/compile/internal/ssa/opGen.go | 404 -------------- src/cmd/compile/internal/ssa/rewriteAMD64.go | 508 ------------------ .../compile/internal/ssagen/simdintrinsics.go | 6 - .../_gen/simdgen/ops/MLOps/categories.yaml | 11 - .../archsimd/_gen/simdgen/ops/MLOps/go.yaml | 27 - .../archsimd/internal/simd_test/simd_test.go | 38 -- src/simd/archsimd/ops_amd64.go | 40 -- 12 files changed, 1120 deletions(-) diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index 454dbb3080..e3fc2fb380 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -1324,12 +1324,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPERMI2Q256, ssa.OpAMD64VPERMI2PD512, ssa.OpAMD64VPERMI2Q512, - ssa.OpAMD64VPDPBUSD128, - ssa.OpAMD64VPDPBUSD256, - ssa.OpAMD64VPDPBUSD512, - ssa.OpAMD64VPDPBUSDS128, - ssa.OpAMD64VPDPBUSDS256, - ssa.OpAMD64VPDPBUSDS512, ssa.OpAMD64VFMADD213PS128, ssa.OpAMD64VFMADD213PS256, ssa.OpAMD64VFMADD213PS512, @@ -1446,12 +1440,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMADDUBSWMasked128Merging, ssa.OpAMD64VPMADDUBSWMasked256Merging, ssa.OpAMD64VPMADDUBSWMasked512Merging, - ssa.OpAMD64VPDPBUSDMasked128, - ssa.OpAMD64VPDPBUSDMasked256, - ssa.OpAMD64VPDPBUSDMasked512, - ssa.OpAMD64VPDPBUSDSMasked128, - ssa.OpAMD64VPDPBUSDSMasked256, - ssa.OpAMD64VPDPBUSDSMasked512, ssa.OpAMD64VGF2P8MULBMasked128Merging, ssa.OpAMD64VGF2P8MULBMasked256Merging, ssa.OpAMD64VGF2P8MULBMasked512Merging, @@ -1971,8 +1959,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPERMI2Q256load, ssa.OpAMD64VPERMI2PD512load, ssa.OpAMD64VPERMI2Q512load, - ssa.OpAMD64VPDPBUSD512load, - ssa.OpAMD64VPDPBUSDS512load, ssa.OpAMD64VFMADD213PS128load, ssa.OpAMD64VFMADD213PS256load, ssa.OpAMD64VFMADD213PS512load, @@ -2020,12 +2006,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPERMI2QMasked256load, ssa.OpAMD64VPERMI2PDMasked512load, ssa.OpAMD64VPERMI2QMasked512load, - ssa.OpAMD64VPDPBUSDMasked128load, - ssa.OpAMD64VPDPBUSDMasked256load, - ssa.OpAMD64VPDPBUSDMasked512load, - ssa.OpAMD64VPDPBUSDSMasked128load, - ssa.OpAMD64VPDPBUSDSMasked256load, - ssa.OpAMD64VPDPBUSDSMasked512load, ssa.OpAMD64VFMADD213PSMasked128load, ssa.OpAMD64VFMADD213PSMasked256load, ssa.OpAMD64VFMADD213PSMasked512load, @@ -3045,18 +3025,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMADDUBSWMasked128, ssa.OpAMD64VPMADDUBSWMasked256, ssa.OpAMD64VPMADDUBSWMasked512, - ssa.OpAMD64VPDPBUSDMasked128, - ssa.OpAMD64VPDPBUSDMasked128load, - ssa.OpAMD64VPDPBUSDMasked256, - ssa.OpAMD64VPDPBUSDMasked256load, - ssa.OpAMD64VPDPBUSDMasked512, - ssa.OpAMD64VPDPBUSDMasked512load, - ssa.OpAMD64VPDPBUSDSMasked128, - ssa.OpAMD64VPDPBUSDSMasked128load, - ssa.OpAMD64VPDPBUSDSMasked256, - ssa.OpAMD64VPDPBUSDSMasked256load, - ssa.OpAMD64VPDPBUSDSMasked512, - ssa.OpAMD64VPDPBUSDSMasked512load, ssa.OpAMD64VEXPANDPSMasked128, ssa.OpAMD64VEXPANDPSMasked256, ssa.OpAMD64VEXPANDPSMasked512, diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules index 353d272179..38ca44f7eb 100644 --- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules @@ -1817,11 +1817,3 @@ (EQ (VPTEST x:(VPAND(D|Q)512 j k) y) yes no) && x == y && x.Uses == 2 => (EQ (VPTEST j k) yes no) (EQ (VPTEST x:(VPANDN(128|256) j k) y) yes no) && x == y && x.Uses == 2 => (ULT (VPTEST k j) yes no) // AndNot has swapped its operand order (EQ (VPTEST x:(VPANDN(D|Q)512 j k) y) yes no) && x == y && x.Uses == 2 => (ULT (VPTEST k j) yes no) // AndNot has swapped its operand order - -// DotProductQuadruple optimizations -(VPADDD128 (VPDPBUSD128 (Zero128 ) x y) z) => (VPDPBUSD128 z x y) -(VPADDD256 (VPDPBUSD256 (Zero256 ) x y) z) => (VPDPBUSD256 z x y) -(VPADDD512 (VPDPBUSD512 (Zero512 ) x y) z) => (VPDPBUSD512 z x y) -(VPADDD128 (VPDPBUSDS128 (Zero128 ) x y) z) => (VPDPBUSDS128 z x y) -(VPADDD256 (VPDPBUSDS256 (Zero256 ) x y) z) => (VPDPBUSDS256 z x y) -(VPADDD512 (VPDPBUSDS512 (Zero512 ) x y) z) => (VPDPBUSDS512 z x y) \ No newline at end of file diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index 6b1cac322c..793cf5c97f 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -316,12 +316,6 @@ (DotProductPairsSaturatedUint8x16 ...) => (VPMADDUBSW128 ...) (DotProductPairsSaturatedUint8x32 ...) => (VPMADDUBSW256 ...) (DotProductPairsSaturatedUint8x64 ...) => (VPMADDUBSW512 ...) -(DotProductQuadrupleInt32x4 ...) => (VPDPBUSD128 ...) -(DotProductQuadrupleInt32x8 ...) => (VPDPBUSD256 ...) -(DotProductQuadrupleInt32x16 ...) => (VPDPBUSD512 ...) -(DotProductQuadrupleSaturatedInt32x4 ...) => (VPDPBUSDS128 ...) -(DotProductQuadrupleSaturatedInt32x8 ...) => (VPDPBUSDS256 ...) -(DotProductQuadrupleSaturatedInt32x16 ...) => (VPDPBUSDS512 ...) (EqualFloat32x4 x y) => (VCMPPS128 [0] x y) (EqualFloat32x8 x y) => (VCMPPS256 [0] x y) (EqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [0] x y)) @@ -1547,12 +1541,6 @@ (VMOVDQU16Masked128 (VPMADDUBSW128 x y) mask) => (VPMADDUBSWMasked128 x y mask) (VMOVDQU16Masked256 (VPMADDUBSW256 x y) mask) => (VPMADDUBSWMasked256 x y mask) (VMOVDQU16Masked512 (VPMADDUBSW512 x y) mask) => (VPMADDUBSWMasked512 x y mask) -(VMOVDQU32Masked128 (VPDPBUSD128 x y z) mask) => (VPDPBUSDMasked128 x y z mask) -(VMOVDQU32Masked256 (VPDPBUSD256 x y z) mask) => (VPDPBUSDMasked256 x y z mask) -(VMOVDQU32Masked512 (VPDPBUSD512 x y z) mask) => (VPDPBUSDMasked512 x y z mask) -(VMOVDQU32Masked128 (VPDPBUSDS128 x y z) mask) => (VPDPBUSDSMasked128 x y z mask) -(VMOVDQU32Masked256 (VPDPBUSDS256 x y z) mask) => (VPDPBUSDSMasked256 x y z mask) -(VMOVDQU32Masked512 (VPDPBUSDS512 x y z) mask) => (VPDPBUSDSMasked512 x y z mask) (VMOVDQU8Masked128 (VPMOVSXBQ128 x) mask) => (VPMOVSXBQMasked128 x mask) (VMOVDQU16Masked128 (VPMOVSXWQ128 x) mask) => (VPMOVSXWQMasked128 x mask) (VMOVDQU32Masked128 (VPMOVSXDQ128 x) mask) => (VPMOVSXDQMasked128 x mask) @@ -2671,14 +2659,6 @@ (VDIVPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked128load {sym} [off] x ptr mask mem) (VDIVPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked256load {sym} [off] x ptr mask mem) (VDIVPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked512load {sym} [off] x ptr mask mem) -(VPDPBUSD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSD512load {sym} [off] x y ptr mem) -(VPDPBUSDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked128load {sym} [off] x y ptr mask mem) -(VPDPBUSDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked256load {sym} [off] x y ptr mask mem) -(VPDPBUSDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked512load {sym} [off] x y ptr mask mem) -(VPDPBUSDS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDS512load {sym} [off] x y ptr mem) -(VPDPBUSDSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDSMasked128load {sym} [off] x y ptr mask mem) -(VPDPBUSDSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDSMasked256load {sym} [off] x y ptr mask mem) -(VPDPBUSDSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDSMasked512load {sym} [off] x y ptr mask mem) (VPCMPEQD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQD512load {sym} [off] x ptr mem) (VPCMPEQQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQQ512load {sym} [off] x ptr mem) (VCMPPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCMPPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index cd0cedc831..c81e6b7869 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -452,18 +452,6 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPCOMPRESSWMasked128", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPCOMPRESSWMasked256", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPCOMPRESSWMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPDPBUSD128", argLength: 3, reg: v31, asm: "VPDPBUSD", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPDPBUSD256", argLength: 3, reg: v31, asm: "VPDPBUSD", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPDPBUSD512", argLength: 3, reg: w31, asm: "VPDPBUSD", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPDPBUSDMasked128", argLength: 4, reg: w3kw, asm: "VPDPBUSD", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPDPBUSDMasked256", argLength: 4, reg: w3kw, asm: "VPDPBUSD", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPDPBUSDMasked512", argLength: 4, reg: w3kw, asm: "VPDPBUSD", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPDPBUSDS128", argLength: 3, reg: v31, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPDPBUSDS256", argLength: 3, reg: v31, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPDPBUSDS512", argLength: 3, reg: w31, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPDPBUSDSMasked128", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPDPBUSDSMasked256", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPDPBUSDSMasked512", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPDPWSSD128", argLength: 3, reg: v31, asm: "VPDPWSSD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPDPWSSD256", argLength: 3, reg: v31, asm: "VPDPWSSD", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPDPWSSD512", argLength: 3, reg: w31, asm: "VPDPWSSD", commutative: false, typ: "Vec512", resultInArg0: true}, @@ -1714,14 +1702,6 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPCMPEQQ512load", argLength: 3, reg: w2kload, asm: "VPCMPEQQ", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false}, {name: "VPCMPGTD512load", argLength: 3, reg: w2kload, asm: "VPCMPGTD", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false}, {name: "VPCMPGTQ512load", argLength: 3, reg: w2kload, asm: "VPCMPGTQ", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false}, - {name: "VPDPBUSD512load", argLength: 4, reg: w31load, asm: "VPDPBUSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true}, - {name: "VPDPBUSDMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPBUSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true}, - {name: "VPDPBUSDMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPBUSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true}, - {name: "VPDPBUSDMasked512load", argLength: 5, reg: w3kwload, asm: "VPDPBUSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true}, - {name: "VPDPBUSDS512load", argLength: 4, reg: w31load, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true}, - {name: "VPDPBUSDSMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true}, - {name: "VPDPBUSDSMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true}, - {name: "VPDPBUSDSMasked512load", argLength: 5, reg: w3kwload, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true}, {name: "VPDPWSSD512load", argLength: 4, reg: w31load, asm: "VPDPWSSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true}, {name: "VPDPWSSDMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPWSSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true}, {name: "VPDPWSSDMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPWSSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true}, diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go index 07878e2c69..8afa2bf259 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go @@ -304,12 +304,6 @@ func simdGenericOps() []opData { {name: "DotProductPairsSaturatedUint8x16", argLength: 2, commutative: false}, {name: "DotProductPairsSaturatedUint8x32", argLength: 2, commutative: false}, {name: "DotProductPairsSaturatedUint8x64", argLength: 2, commutative: false}, - {name: "DotProductQuadrupleInt32x4", argLength: 3, commutative: false}, - {name: "DotProductQuadrupleInt32x8", argLength: 3, commutative: false}, - {name: "DotProductQuadrupleInt32x16", argLength: 3, commutative: false}, - {name: "DotProductQuadrupleSaturatedInt32x4", argLength: 3, commutative: false}, - {name: "DotProductQuadrupleSaturatedInt32x8", argLength: 3, commutative: false}, - {name: "DotProductQuadrupleSaturatedInt32x16", argLength: 3, commutative: false}, {name: "EqualFloat32x4", argLength: 2, commutative: true}, {name: "EqualFloat32x8", argLength: 2, commutative: true}, {name: "EqualFloat32x16", argLength: 2, commutative: true}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index ab7ca8de0d..9ba5767596 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1693,18 +1693,6 @@ const ( OpAMD64VPCOMPRESSWMasked128 OpAMD64VPCOMPRESSWMasked256 OpAMD64VPCOMPRESSWMasked512 - OpAMD64VPDPBUSD128 - OpAMD64VPDPBUSD256 - OpAMD64VPDPBUSD512 - OpAMD64VPDPBUSDMasked128 - OpAMD64VPDPBUSDMasked256 - OpAMD64VPDPBUSDMasked512 - OpAMD64VPDPBUSDS128 - OpAMD64VPDPBUSDS256 - OpAMD64VPDPBUSDS512 - OpAMD64VPDPBUSDSMasked128 - OpAMD64VPDPBUSDSMasked256 - OpAMD64VPDPBUSDSMasked512 OpAMD64VPDPWSSD128 OpAMD64VPDPWSSD256 OpAMD64VPDPWSSD512 @@ -2955,14 +2943,6 @@ const ( OpAMD64VPCMPEQQ512load OpAMD64VPCMPGTD512load OpAMD64VPCMPGTQ512load - OpAMD64VPDPBUSD512load - OpAMD64VPDPBUSDMasked128load - OpAMD64VPDPBUSDMasked256load - OpAMD64VPDPBUSDMasked512load - OpAMD64VPDPBUSDS512load - OpAMD64VPDPBUSDSMasked128load - OpAMD64VPDPBUSDSMasked256load - OpAMD64VPDPBUSDSMasked512load OpAMD64VPDPWSSD512load OpAMD64VPDPWSSDMasked128load OpAMD64VPDPWSSDMasked256load @@ -6478,12 +6458,6 @@ const ( OpDotProductPairsSaturatedUint8x16 OpDotProductPairsSaturatedUint8x32 OpDotProductPairsSaturatedUint8x64 - OpDotProductQuadrupleInt32x4 - OpDotProductQuadrupleInt32x8 - OpDotProductQuadrupleInt32x16 - OpDotProductQuadrupleSaturatedInt32x4 - OpDotProductQuadrupleSaturatedInt32x8 - OpDotProductQuadrupleSaturatedInt32x16 OpEqualFloat32x4 OpEqualFloat32x8 OpEqualFloat32x16 @@ -27293,204 +27267,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPDPBUSD128", - argLen: 3, - resultInArg0: true, - asm: x86.AVPDPBUSD, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 - {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPDPBUSD256", - argLen: 3, - resultInArg0: true, - asm: x86.AVPDPBUSD, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 - {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPDPBUSD512", - argLen: 3, - resultInArg0: true, - asm: x86.AVPDPBUSD, - reg: regInfo{ - inputs: []inputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDMasked128", - argLen: 4, - resultInArg0: true, - asm: x86.AVPDPBUSD, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDMasked256", - argLen: 4, - resultInArg0: true, - asm: x86.AVPDPBUSD, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDMasked512", - argLen: 4, - resultInArg0: true, - asm: x86.AVPDPBUSD, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDS128", - argLen: 3, - resultInArg0: true, - asm: x86.AVPDPBUSDS, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 - {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPDPBUSDS256", - argLen: 3, - resultInArg0: true, - asm: x86.AVPDPBUSDS, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 - {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPDPBUSDS512", - argLen: 3, - resultInArg0: true, - asm: x86.AVPDPBUSDS, - reg: regInfo{ - inputs: []inputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDSMasked128", - argLen: 4, - resultInArg0: true, - asm: x86.AVPDPBUSDS, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDSMasked256", - argLen: 4, - resultInArg0: true, - asm: x86.AVPDPBUSDS, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDSMasked512", - argLen: 4, - resultInArg0: true, - asm: x86.AVPDPBUSDS, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, { name: "VPDPWSSD128", argLen: 3, @@ -46191,156 +45967,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPDPBUSD512load", - auxType: auxSymOff, - argLen: 4, - resultInArg0: true, - symEffect: SymRead, - asm: x86.AVPDPBUSD, - reg: regInfo{ - inputs: []inputInfo{ - {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDMasked128load", - auxType: auxSymOff, - argLen: 5, - resultInArg0: true, - symEffect: SymRead, - asm: x86.AVPDPBUSD, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDMasked256load", - auxType: auxSymOff, - argLen: 5, - resultInArg0: true, - symEffect: SymRead, - asm: x86.AVPDPBUSD, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDMasked512load", - auxType: auxSymOff, - argLen: 5, - resultInArg0: true, - symEffect: SymRead, - asm: x86.AVPDPBUSD, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDS512load", - auxType: auxSymOff, - argLen: 4, - resultInArg0: true, - symEffect: SymRead, - asm: x86.AVPDPBUSDS, - reg: regInfo{ - inputs: []inputInfo{ - {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDSMasked128load", - auxType: auxSymOff, - argLen: 5, - resultInArg0: true, - symEffect: SymRead, - asm: x86.AVPDPBUSDS, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDSMasked256load", - auxType: auxSymOff, - argLen: 5, - resultInArg0: true, - symEffect: SymRead, - asm: x86.AVPDPBUSDS, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDSMasked512load", - auxType: auxSymOff, - argLen: 5, - resultInArg0: true, - symEffect: SymRead, - asm: x86.AVPDPBUSDS, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, { name: "VPDPWSSD512load", auxType: auxSymOff, @@ -90939,36 +90565,6 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, - { - name: "DotProductQuadrupleInt32x4", - argLen: 3, - generic: true, - }, - { - name: "DotProductQuadrupleInt32x8", - argLen: 3, - generic: true, - }, - { - name: "DotProductQuadrupleInt32x16", - argLen: 3, - generic: true, - }, - { - name: "DotProductQuadrupleSaturatedInt32x4", - argLen: 3, - generic: true, - }, - { - name: "DotProductQuadrupleSaturatedInt32x8", - argLen: 3, - generic: true, - }, - { - name: "DotProductQuadrupleSaturatedInt32x16", - argLen: 3, - generic: true, - }, { name: "EqualFloat32x4", argLen: 2, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index a0f4f6a704..35e9516f61 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -1006,10 +1006,6 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPACKUSDWMasked256(v) case OpAMD64VPACKUSDWMasked512: return rewriteValueAMD64_OpAMD64VPACKUSDWMasked512(v) - case OpAMD64VPADDD128: - return rewriteValueAMD64_OpAMD64VPADDD128(v) - case OpAMD64VPADDD256: - return rewriteValueAMD64_OpAMD64VPADDD256(v) case OpAMD64VPADDD512: return rewriteValueAMD64_OpAMD64VPADDD512(v) case OpAMD64VPADDDMasked128: @@ -1126,22 +1122,6 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPCMPUQMasked256(v) case OpAMD64VPCMPUQMasked512: return rewriteValueAMD64_OpAMD64VPCMPUQMasked512(v) - case OpAMD64VPDPBUSD512: - return rewriteValueAMD64_OpAMD64VPDPBUSD512(v) - case OpAMD64VPDPBUSDMasked128: - return rewriteValueAMD64_OpAMD64VPDPBUSDMasked128(v) - case OpAMD64VPDPBUSDMasked256: - return rewriteValueAMD64_OpAMD64VPDPBUSDMasked256(v) - case OpAMD64VPDPBUSDMasked512: - return rewriteValueAMD64_OpAMD64VPDPBUSDMasked512(v) - case OpAMD64VPDPBUSDS512: - return rewriteValueAMD64_OpAMD64VPDPBUSDS512(v) - case OpAMD64VPDPBUSDSMasked128: - return rewriteValueAMD64_OpAMD64VPDPBUSDSMasked128(v) - case OpAMD64VPDPBUSDSMasked256: - return rewriteValueAMD64_OpAMD64VPDPBUSDSMasked256(v) - case OpAMD64VPDPBUSDSMasked512: - return rewriteValueAMD64_OpAMD64VPDPBUSDSMasked512(v) case OpAMD64VPDPWSSD512: return rewriteValueAMD64_OpAMD64VPDPWSSD512(v) case OpAMD64VPDPWSSDMasked128: @@ -3142,24 +3122,6 @@ func rewriteValueAMD64(v *Value) bool { case OpDotProductPairsSaturatedUint8x64: v.Op = OpAMD64VPMADDUBSW512 return true - case OpDotProductQuadrupleInt32x16: - v.Op = OpAMD64VPDPBUSD512 - return true - case OpDotProductQuadrupleInt32x4: - v.Op = OpAMD64VPDPBUSD128 - return true - case OpDotProductQuadrupleInt32x8: - v.Op = OpAMD64VPDPBUSD256 - return true - case OpDotProductQuadrupleSaturatedInt32x16: - v.Op = OpAMD64VPDPBUSDS512 - return true - case OpDotProductQuadrupleSaturatedInt32x4: - v.Op = OpAMD64VPDPBUSDS128 - return true - case OpDotProductQuadrupleSaturatedInt32x8: - v.Op = OpAMD64VPDPBUSDS256 - return true case OpEq16: return rewriteValueAMD64_OpEq16(v) case OpEq32: @@ -35318,34 +35280,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool { v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU32Masked128 (VPDPBUSD128 x y z) mask) - // result: (VPDPBUSDMasked128 x y z mask) - for { - if v_0.Op != OpAMD64VPDPBUSD128 { - break - } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPDPBUSDMasked128) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU32Masked128 (VPDPBUSDS128 x y z) mask) - // result: (VPDPBUSDSMasked128 x y z mask) - for { - if v_0.Op != OpAMD64VPDPBUSDS128 { - break - } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPDPBUSDSMasked128) - v.AddArg4(x, y, z, mask) - return true - } // match: (VMOVDQU32Masked128 (VPMOVSXDQ128 x) mask) // result: (VPMOVSXDQMasked128 x mask) for { @@ -36165,34 +36099,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool { v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU32Masked256 (VPDPBUSD256 x y z) mask) - // result: (VPDPBUSDMasked256 x y z mask) - for { - if v_0.Op != OpAMD64VPDPBUSD256 { - break - } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPDPBUSDMasked256) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPDPBUSDS256 x y z) mask) - // result: (VPDPBUSDSMasked256 x y z mask) - for { - if v_0.Op != OpAMD64VPDPBUSDS256 { - break - } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPDPBUSDSMasked256) - v.AddArg4(x, y, z, mask) - return true - } // match: (VMOVDQU32Masked256 (VPMOVSXDQ256 x) mask) // result: (VPMOVSXDQMasked256 x mask) for { @@ -37100,34 +37006,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool { v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU32Masked512 (VPDPBUSD512 x y z) mask) - // result: (VPDPBUSDMasked512 x y z mask) - for { - if v_0.Op != OpAMD64VPDPBUSD512 { - break - } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPDPBUSDMasked512) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPDPBUSDS512 x y z) mask) - // result: (VPDPBUSDSMasked512 x y z mask) - for { - if v_0.Op != OpAMD64VPDPBUSDS512 { - break - } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPDPBUSDSMasked512) - v.AddArg4(x, y, z, mask) - return true - } // match: (VMOVDQU32Masked512 (VPMOVSXDQ512 x) mask) // result: (VPMOVSXDQMasked512 x mask) for { @@ -42503,151 +42381,9 @@ func rewriteValueAMD64_OpAMD64VPACKUSDWMasked512(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VPADDD128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPADDD128 (VPDPBUSD128 (Zero128 ) x y) z) - // result: (VPDPBUSD128 z x y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64VPDPBUSD128 { - continue - } - y := v_0.Args[2] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64Zero128 { - continue - } - t := v_0_0.Type - x := v_0.Args[1] - z := v_1 - v.reset(OpAMD64VPDPBUSD128) - v.Type = t - v.AddArg3(z, x, y) - return true - } - break - } - // match: (VPADDD128 (VPDPBUSDS128 (Zero128 ) x y) z) - // result: (VPDPBUSDS128 z x y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64VPDPBUSDS128 { - continue - } - y := v_0.Args[2] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64Zero128 { - continue - } - t := v_0_0.Type - x := v_0.Args[1] - z := v_1 - v.reset(OpAMD64VPDPBUSDS128) - v.Type = t - v.AddArg3(z, x, y) - return true - } - break - } - return false -} -func rewriteValueAMD64_OpAMD64VPADDD256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPADDD256 (VPDPBUSD256 (Zero256 ) x y) z) - // result: (VPDPBUSD256 z x y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64VPDPBUSD256 { - continue - } - y := v_0.Args[2] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64Zero256 { - continue - } - t := v_0_0.Type - x := v_0.Args[1] - z := v_1 - v.reset(OpAMD64VPDPBUSD256) - v.Type = t - v.AddArg3(z, x, y) - return true - } - break - } - // match: (VPADDD256 (VPDPBUSDS256 (Zero256 ) x y) z) - // result: (VPDPBUSDS256 z x y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64VPDPBUSDS256 { - continue - } - y := v_0.Args[2] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64Zero256 { - continue - } - t := v_0_0.Type - x := v_0.Args[1] - z := v_1 - v.reset(OpAMD64VPDPBUSDS256) - v.Type = t - v.AddArg3(z, x, y) - return true - } - break - } - return false -} func rewriteValueAMD64_OpAMD64VPADDD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPADDD512 (VPDPBUSD512 (Zero512 ) x y) z) - // result: (VPDPBUSD512 z x y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64VPDPBUSD512 { - continue - } - y := v_0.Args[2] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64Zero512 { - continue - } - t := v_0_0.Type - x := v_0.Args[1] - z := v_1 - v.reset(OpAMD64VPDPBUSD512) - v.Type = t - v.AddArg3(z, x, y) - return true - } - break - } - // match: (VPADDD512 (VPDPBUSDS512 (Zero512 ) x y) z) - // result: (VPDPBUSDS512 z x y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64VPDPBUSDS512 { - continue - } - y := v_0.Args[2] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64Zero512 { - continue - } - t := v_0_0.Type - x := v_0.Args[1] - z := v_1 - v.reset(OpAMD64VPDPBUSDS512) - v.Type = t - v.AddArg3(z, x, y) - return true - } - break - } // match: (VPADDD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) // result: (VPADDD512load {sym} [off] x ptr mem) @@ -53370,250 +53106,6 @@ func rewriteValueAMD64_OpAMD64VPCMPUQMasked512(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VPDPBUSD512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPDPBUSD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPDPBUSD512load {sym} [off] x y ptr mem) - for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64VPDPBUSD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VPDPBUSDMasked128(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPDPBUSDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPDPBUSDMasked128load {sym} [off] x y ptr mask mem) - for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64VPDPBUSDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VPDPBUSDMasked256(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPDPBUSDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPDPBUSDMasked256load {sym} [off] x y ptr mask mem) - for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64VPDPBUSDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VPDPBUSDMasked512(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPDPBUSDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPDPBUSDMasked512load {sym} [off] x y ptr mask mem) - for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64VPDPBUSDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VPDPBUSDS512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPDPBUSDS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPDPBUSDS512load {sym} [off] x y ptr mem) - for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64VPDPBUSDS512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VPDPBUSDSMasked128(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPDPBUSDSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPDPBUSDSMasked128load {sym} [off] x y ptr mask mem) - for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64VPDPBUSDSMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VPDPBUSDSMasked256(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPDPBUSDSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPDPBUSDSMasked256load {sym} [off] x y ptr mask mem) - for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64VPDPBUSDSMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VPDPBUSDSMasked512(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPDPBUSDSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPDPBUSDSMasked512load {sym} [off] x y ptr mask mem) - for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64VPDPBUSDSMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) - return true - } - return false -} func rewriteValueAMD64_OpAMD64VPDPWSSD512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index e1d7ac796d..6769122aa4 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -328,12 +328,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint8x16.DotProductPairsSaturated", opLen2(ssa.OpDotProductPairsSaturatedUint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x32.DotProductPairsSaturated", opLen2(ssa.OpDotProductPairsSaturatedUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x64.DotProductPairsSaturated", opLen2(ssa.OpDotProductPairsSaturatedUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.DotProductQuadruple", opLen3_31Zero3(ssa.OpDotProductQuadrupleInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.DotProductQuadruple", opLen3_31Zero3(ssa.OpDotProductQuadrupleInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x64.DotProductQuadruple", opLen3_31Zero3(ssa.OpDotProductQuadrupleInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.DotProductQuadrupleSaturated", opLen3_31Zero3(ssa.OpDotProductQuadrupleSaturatedInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.DotProductQuadrupleSaturated", opLen3_31Zero3(ssa.OpDotProductQuadrupleSaturatedInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x64.DotProductQuadrupleSaturated", opLen3_31Zero3(ssa.OpDotProductQuadrupleSaturatedInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x16.Equal", opLen2(ssa.OpEqualInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x32.Equal", opLen2(ssa.OpEqualInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x64.Equal", opLen2(ssa.OpEqualInt8x64, types.TypeVec512), sys.AMD64) diff --git a/src/simd/archsimd/_gen/simdgen/ops/MLOps/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/MLOps/categories.yaml index 2b1da7adaf..54a8ece574 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/MLOps/categories.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/MLOps/categories.yaml @@ -10,21 +10,10 @@ documentation: !string |- // NAME multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. -# QuadDotProduct, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. # - go: DotProductBroadcast # commutative: true # # documentation: !string |- # // NAME multiplies all elements and broadcasts the sum. -- go: DotProductQuadruple - commutative: false - documentation: !string |- - // NAME performs dot products on groups of 4 elements of x and y. - // NAME(x, y).Add(z) will be optimized to the full form of the underlying instruction. -- go: DotProductQuadrupleSaturated - commutative: false - documentation: !string |- - // NAME multiplies performs dot products on groups of 4 elements of x and y. - // NAME(x, y).Add(z) will be optimized to the full form of the underlying instruction. - go: AddDotProductPairs commutative: false noTypes: "true" diff --git a/src/simd/archsimd/_gen/simdgen/ops/MLOps/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/MLOps/go.yaml index 4a1195b52d..18ce8a53b2 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/MLOps/go.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/MLOps/go.yaml @@ -33,33 +33,6 @@ # const: 127 # out: # - *dpb_src -- go: DotProductQuadruple - asm: "VPDPBUSD" - operandOrder: "31Zero3" # switch operand 3 and 1, and make 3 always 0 - in: - - &qdpa_acc - go: $t_acc - base: int - elemBits: 32 - - &qdpa_src1 - go: $t_src1 - base: uint - overwriteElementBits: 8 - - &qdpa_src2 - go: $t_src2 - base: int - overwriteElementBits: 8 - out: - - *qdpa_acc -- go: DotProductQuadrupleSaturated - asm: "VPDPBUSDS" - operandOrder: "31Zero3" # switch operand 3 and 1, and make 3 always 0 - in: - - *qdpa_acc - - *qdpa_src1 - - *qdpa_src2 - out: - - *qdpa_acc - go: AddDotProductPairs asm: "VPDPWSSD" in: diff --git a/src/simd/archsimd/internal/simd_test/simd_test.go b/src/simd/archsimd/internal/simd_test/simd_test.go index 5fd7407dbe..21c4d87dec 100644 --- a/src/simd/archsimd/internal/simd_test/simd_test.go +++ b/src/simd/archsimd/internal/simd_test/simd_test.go @@ -1123,44 +1123,6 @@ func TestMaskedMerge(t *testing.T) { } } -func TestDotProductQuadruple(t *testing.T) { - if !archsimd.X86.AVXVNNI() { - t.Skip("Test requires X86.AVXVNNI, not available on this hardware") - return - } - xd := make([]int8, 16) - yd := make([]uint8, 16) - zd := make([]int32, 4) - wanted1 := make([]int32, 4) - wanted2 := make([]int32, 4) - res1 := make([]int32, 4) - res2 := make([]int32, 4) - for i := range 16 { - xd[i] = int8(i + 112) // 112+15 = 127 - yd[i] = uint8(i + 240) // 240+15 = 255 - } - for i := range 4 { - i4 := 4 * i - wanted1[i] = int32(xd[i4])*int32(yd[i4]) + int32(xd[i4+1])*int32(yd[i4+1]) + int32(xd[i4+2])*int32(yd[i4+2]) + int32(xd[i4+3])*int32(yd[i4+3]) - zd[i] = int32(i + 1) - wanted2[i] = wanted1[i] + zd[i] - } - - x := archsimd.LoadInt8x16Slice(xd) - y := archsimd.LoadUint8x16Slice(yd) - z := archsimd.LoadInt32x4Slice(zd) - x.DotProductQuadruple(y).StoreSlice(res1) - x.DotProductQuadruple(y).Add(z).StoreSlice(res2) - for i := range 4 { - if res1[i] != wanted1[i] { - t.Errorf("got %d wanted %d", res1[i], wanted1[i]) - } - if res2[i] != wanted2[i] { - t.Errorf("got %d wanted %d", res2[i], wanted2[i]) - } - } -} - func TestPermuteScalars(t *testing.T) { x := []int32{11, 12, 13, 14} want := []int32{12, 13, 14, 11} diff --git a/src/simd/archsimd/ops_amd64.go b/src/simd/archsimd/ops_amd64.go index 2a8a6bd4c6..6f904f1cbc 100644 --- a/src/simd/archsimd/ops_amd64.go +++ b/src/simd/archsimd/ops_amd64.go @@ -1980,46 +1980,6 @@ func (x Uint8x32) DotProductPairsSaturated(y Int8x32) Int16x16 // Asm: VPMADDUBSW, CPU Feature: AVX512 func (x Uint8x64) DotProductPairsSaturated(y Int8x64) Int16x32 -/* DotProductQuadruple */ - -// DotProductQuadruple performs dot products on groups of 4 elements of x and y. -// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction. -// -// Asm: VPDPBUSD, CPU Feature: AVXVNNI -func (x Int8x16) DotProductQuadruple(y Uint8x16) Int32x4 - -// DotProductQuadruple performs dot products on groups of 4 elements of x and y. -// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction. -// -// Asm: VPDPBUSD, CPU Feature: AVXVNNI -func (x Int8x32) DotProductQuadruple(y Uint8x32) Int32x8 - -// DotProductQuadruple performs dot products on groups of 4 elements of x and y. -// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction. -// -// Asm: VPDPBUSD, CPU Feature: AVX512VNNI -func (x Int8x64) DotProductQuadruple(y Uint8x64) Int32x16 - -/* DotProductQuadrupleSaturated */ - -// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y. -// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction. -// -// Asm: VPDPBUSDS, CPU Feature: AVXVNNI -func (x Int8x16) DotProductQuadrupleSaturated(y Uint8x16) Int32x4 - -// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y. -// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction. -// -// Asm: VPDPBUSDS, CPU Feature: AVXVNNI -func (x Int8x32) DotProductQuadrupleSaturated(y Uint8x32) Int32x8 - -// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y. -// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction. -// -// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI -func (x Int8x64) DotProductQuadrupleSaturated(y Uint8x64) Int32x16 - /* Equal */ // Equal returns a mask whose elements indicate whether x == y. -- 2.52.0