ssa.OpAMD64VPERMI2Q256,
ssa.OpAMD64VPERMI2PD512,
ssa.OpAMD64VPERMI2Q512,
- ssa.OpAMD64VPDPBUSD128,
- ssa.OpAMD64VPDPBUSD256,
- ssa.OpAMD64VPDPBUSD512,
- ssa.OpAMD64VPDPBUSDS128,
- ssa.OpAMD64VPDPBUSDS256,
- ssa.OpAMD64VPDPBUSDS512,
ssa.OpAMD64VFMADD213PS128,
ssa.OpAMD64VFMADD213PS256,
ssa.OpAMD64VFMADD213PS512,
ssa.OpAMD64VPMADDUBSWMasked128Merging,
ssa.OpAMD64VPMADDUBSWMasked256Merging,
ssa.OpAMD64VPMADDUBSWMasked512Merging,
- ssa.OpAMD64VPDPBUSDMasked128,
- ssa.OpAMD64VPDPBUSDMasked256,
- ssa.OpAMD64VPDPBUSDMasked512,
- ssa.OpAMD64VPDPBUSDSMasked128,
- ssa.OpAMD64VPDPBUSDSMasked256,
- ssa.OpAMD64VPDPBUSDSMasked512,
ssa.OpAMD64VGF2P8MULBMasked128Merging,
ssa.OpAMD64VGF2P8MULBMasked256Merging,
ssa.OpAMD64VGF2P8MULBMasked512Merging,
ssa.OpAMD64VPERMI2Q256load,
ssa.OpAMD64VPERMI2PD512load,
ssa.OpAMD64VPERMI2Q512load,
- ssa.OpAMD64VPDPBUSD512load,
- ssa.OpAMD64VPDPBUSDS512load,
ssa.OpAMD64VFMADD213PS128load,
ssa.OpAMD64VFMADD213PS256load,
ssa.OpAMD64VFMADD213PS512load,
ssa.OpAMD64VPERMI2QMasked256load,
ssa.OpAMD64VPERMI2PDMasked512load,
ssa.OpAMD64VPERMI2QMasked512load,
- ssa.OpAMD64VPDPBUSDMasked128load,
- ssa.OpAMD64VPDPBUSDMasked256load,
- ssa.OpAMD64VPDPBUSDMasked512load,
- ssa.OpAMD64VPDPBUSDSMasked128load,
- ssa.OpAMD64VPDPBUSDSMasked256load,
- ssa.OpAMD64VPDPBUSDSMasked512load,
ssa.OpAMD64VFMADD213PSMasked128load,
ssa.OpAMD64VFMADD213PSMasked256load,
ssa.OpAMD64VFMADD213PSMasked512load,
ssa.OpAMD64VPMADDUBSWMasked128,
ssa.OpAMD64VPMADDUBSWMasked256,
ssa.OpAMD64VPMADDUBSWMasked512,
- ssa.OpAMD64VPDPBUSDMasked128,
- ssa.OpAMD64VPDPBUSDMasked128load,
- ssa.OpAMD64VPDPBUSDMasked256,
- ssa.OpAMD64VPDPBUSDMasked256load,
- ssa.OpAMD64VPDPBUSDMasked512,
- ssa.OpAMD64VPDPBUSDMasked512load,
- ssa.OpAMD64VPDPBUSDSMasked128,
- ssa.OpAMD64VPDPBUSDSMasked128load,
- ssa.OpAMD64VPDPBUSDSMasked256,
- ssa.OpAMD64VPDPBUSDSMasked256load,
- ssa.OpAMD64VPDPBUSDSMasked512,
- ssa.OpAMD64VPDPBUSDSMasked512load,
ssa.OpAMD64VEXPANDPSMasked128,
ssa.OpAMD64VEXPANDPSMasked256,
ssa.OpAMD64VEXPANDPSMasked512,
(EQ (VPTEST x:(VPAND(D|Q)512 j k) y) yes no) && x == y && x.Uses == 2 => (EQ (VPTEST j k) yes no)
(EQ (VPTEST x:(VPANDN(128|256) j k) y) yes no) && x == y && x.Uses == 2 => (ULT (VPTEST k j) yes no) // AndNot has swapped its operand order
(EQ (VPTEST x:(VPANDN(D|Q)512 j k) y) yes no) && x == y && x.Uses == 2 => (ULT (VPTEST k j) yes no) // AndNot has swapped its operand order
-
-// DotProductQuadruple optimizations
-(VPADDD128 (VPDPBUSD128 (Zero128 <t>) x y) z) => (VPDPBUSD128 <t> z x y)
-(VPADDD256 (VPDPBUSD256 (Zero256 <t>) x y) z) => (VPDPBUSD256 <t> z x y)
-(VPADDD512 (VPDPBUSD512 (Zero512 <t>) x y) z) => (VPDPBUSD512 <t> z x y)
-(VPADDD128 (VPDPBUSDS128 (Zero128 <t>) x y) z) => (VPDPBUSDS128 <t> z x y)
-(VPADDD256 (VPDPBUSDS256 (Zero256 <t>) x y) z) => (VPDPBUSDS256 <t> z x y)
-(VPADDD512 (VPDPBUSDS512 (Zero512 <t>) x y) z) => (VPDPBUSDS512 <t> z x y)
\ No newline at end of file
(DotProductPairsSaturatedUint8x16 ...) => (VPMADDUBSW128 ...)
(DotProductPairsSaturatedUint8x32 ...) => (VPMADDUBSW256 ...)
(DotProductPairsSaturatedUint8x64 ...) => (VPMADDUBSW512 ...)
-(DotProductQuadrupleInt32x4 ...) => (VPDPBUSD128 ...)
-(DotProductQuadrupleInt32x8 ...) => (VPDPBUSD256 ...)
-(DotProductQuadrupleInt32x16 ...) => (VPDPBUSD512 ...)
-(DotProductQuadrupleSaturatedInt32x4 ...) => (VPDPBUSDS128 ...)
-(DotProductQuadrupleSaturatedInt32x8 ...) => (VPDPBUSDS256 ...)
-(DotProductQuadrupleSaturatedInt32x16 ...) => (VPDPBUSDS512 ...)
(EqualFloat32x4 x y) => (VCMPPS128 [0] x y)
(EqualFloat32x8 x y) => (VCMPPS256 [0] x y)
(EqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [0] x y))
(VMOVDQU16Masked128 (VPMADDUBSW128 x y) mask) => (VPMADDUBSWMasked128 x y mask)
(VMOVDQU16Masked256 (VPMADDUBSW256 x y) mask) => (VPMADDUBSWMasked256 x y mask)
(VMOVDQU16Masked512 (VPMADDUBSW512 x y) mask) => (VPMADDUBSWMasked512 x y mask)
-(VMOVDQU32Masked128 (VPDPBUSD128 x y z) mask) => (VPDPBUSDMasked128 x y z mask)
-(VMOVDQU32Masked256 (VPDPBUSD256 x y z) mask) => (VPDPBUSDMasked256 x y z mask)
-(VMOVDQU32Masked512 (VPDPBUSD512 x y z) mask) => (VPDPBUSDMasked512 x y z mask)
-(VMOVDQU32Masked128 (VPDPBUSDS128 x y z) mask) => (VPDPBUSDSMasked128 x y z mask)
-(VMOVDQU32Masked256 (VPDPBUSDS256 x y z) mask) => (VPDPBUSDSMasked256 x y z mask)
-(VMOVDQU32Masked512 (VPDPBUSDS512 x y z) mask) => (VPDPBUSDSMasked512 x y z mask)
(VMOVDQU8Masked128 (VPMOVSXBQ128 x) mask) => (VPMOVSXBQMasked128 x mask)
(VMOVDQU16Masked128 (VPMOVSXWQ128 x) mask) => (VPMOVSXWQMasked128 x mask)
(VMOVDQU32Masked128 (VPMOVSXDQ128 x) mask) => (VPMOVSXDQMasked128 x mask)
(VDIVPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked128load {sym} [off] x ptr mask mem)
(VDIVPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked256load {sym} [off] x ptr mask mem)
(VDIVPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked512load {sym} [off] x ptr mask mem)
-(VPDPBUSD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSD512load {sym} [off] x y ptr mem)
-(VPDPBUSDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked128load {sym} [off] x y ptr mask mem)
-(VPDPBUSDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked256load {sym} [off] x y ptr mask mem)
-(VPDPBUSDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked512load {sym} [off] x y ptr mask mem)
-(VPDPBUSDS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDS512load {sym} [off] x y ptr mem)
-(VPDPBUSDSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDSMasked128load {sym} [off] x y ptr mask mem)
-(VPDPBUSDSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDSMasked256load {sym} [off] x y ptr mask mem)
-(VPDPBUSDSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDSMasked512load {sym} [off] x y ptr mask mem)
(VPCMPEQD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQD512load {sym} [off] x ptr mem)
(VPCMPEQQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQQ512load {sym} [off] x ptr mem)
(VCMPPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCMPPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
{name: "VPCOMPRESSWMasked128", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPCOMPRESSWMasked256", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPCOMPRESSWMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec512", resultInArg0: false},
- {name: "VPDPBUSD128", argLength: 3, reg: v31, asm: "VPDPBUSD", commutative: false, typ: "Vec128", resultInArg0: true},
- {name: "VPDPBUSD256", argLength: 3, reg: v31, asm: "VPDPBUSD", commutative: false, typ: "Vec256", resultInArg0: true},
- {name: "VPDPBUSD512", argLength: 3, reg: w31, asm: "VPDPBUSD", commutative: false, typ: "Vec512", resultInArg0: true},
- {name: "VPDPBUSDMasked128", argLength: 4, reg: w3kw, asm: "VPDPBUSD", commutative: false, typ: "Vec128", resultInArg0: true},
- {name: "VPDPBUSDMasked256", argLength: 4, reg: w3kw, asm: "VPDPBUSD", commutative: false, typ: "Vec256", resultInArg0: true},
- {name: "VPDPBUSDMasked512", argLength: 4, reg: w3kw, asm: "VPDPBUSD", commutative: false, typ: "Vec512", resultInArg0: true},
- {name: "VPDPBUSDS128", argLength: 3, reg: v31, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true},
- {name: "VPDPBUSDS256", argLength: 3, reg: v31, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true},
- {name: "VPDPBUSDS512", argLength: 3, reg: w31, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true},
- {name: "VPDPBUSDSMasked128", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true},
- {name: "VPDPBUSDSMasked256", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true},
- {name: "VPDPBUSDSMasked512", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPDPWSSD128", argLength: 3, reg: v31, asm: "VPDPWSSD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPDPWSSD256", argLength: 3, reg: v31, asm: "VPDPWSSD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPDPWSSD512", argLength: 3, reg: w31, asm: "VPDPWSSD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPCMPEQQ512load", argLength: 3, reg: w2kload, asm: "VPCMPEQQ", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPGTD512load", argLength: 3, reg: w2kload, asm: "VPCMPGTD", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPGTQ512load", argLength: 3, reg: w2kload, asm: "VPCMPGTQ", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false},
- {name: "VPDPBUSD512load", argLength: 4, reg: w31load, asm: "VPDPBUSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
- {name: "VPDPBUSDMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPBUSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
- {name: "VPDPBUSDMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPBUSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
- {name: "VPDPBUSDMasked512load", argLength: 5, reg: w3kwload, asm: "VPDPBUSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
- {name: "VPDPBUSDS512load", argLength: 4, reg: w31load, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
- {name: "VPDPBUSDSMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
- {name: "VPDPBUSDSMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
- {name: "VPDPBUSDSMasked512load", argLength: 5, reg: w3kwload, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPWSSD512load", argLength: 4, reg: w31load, asm: "VPDPWSSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPWSSDMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPWSSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPWSSDMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPWSSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "DotProductPairsSaturatedUint8x16", argLength: 2, commutative: false},
{name: "DotProductPairsSaturatedUint8x32", argLength: 2, commutative: false},
{name: "DotProductPairsSaturatedUint8x64", argLength: 2, commutative: false},
- {name: "DotProductQuadrupleInt32x4", argLength: 3, commutative: false},
- {name: "DotProductQuadrupleInt32x8", argLength: 3, commutative: false},
- {name: "DotProductQuadrupleInt32x16", argLength: 3, commutative: false},
- {name: "DotProductQuadrupleSaturatedInt32x4", argLength: 3, commutative: false},
- {name: "DotProductQuadrupleSaturatedInt32x8", argLength: 3, commutative: false},
- {name: "DotProductQuadrupleSaturatedInt32x16", argLength: 3, commutative: false},
{name: "EqualFloat32x4", argLength: 2, commutative: true},
{name: "EqualFloat32x8", argLength: 2, commutative: true},
{name: "EqualFloat32x16", argLength: 2, commutative: true},
OpAMD64VPCOMPRESSWMasked128
OpAMD64VPCOMPRESSWMasked256
OpAMD64VPCOMPRESSWMasked512
- OpAMD64VPDPBUSD128
- OpAMD64VPDPBUSD256
- OpAMD64VPDPBUSD512
- OpAMD64VPDPBUSDMasked128
- OpAMD64VPDPBUSDMasked256
- OpAMD64VPDPBUSDMasked512
- OpAMD64VPDPBUSDS128
- OpAMD64VPDPBUSDS256
- OpAMD64VPDPBUSDS512
- OpAMD64VPDPBUSDSMasked128
- OpAMD64VPDPBUSDSMasked256
- OpAMD64VPDPBUSDSMasked512
OpAMD64VPDPWSSD128
OpAMD64VPDPWSSD256
OpAMD64VPDPWSSD512
OpAMD64VPCMPEQQ512load
OpAMD64VPCMPGTD512load
OpAMD64VPCMPGTQ512load
- OpAMD64VPDPBUSD512load
- OpAMD64VPDPBUSDMasked128load
- OpAMD64VPDPBUSDMasked256load
- OpAMD64VPDPBUSDMasked512load
- OpAMD64VPDPBUSDS512load
- OpAMD64VPDPBUSDSMasked128load
- OpAMD64VPDPBUSDSMasked256load
- OpAMD64VPDPBUSDSMasked512load
OpAMD64VPDPWSSD512load
OpAMD64VPDPWSSDMasked128load
OpAMD64VPDPWSSDMasked256load
OpDotProductPairsSaturatedUint8x16
OpDotProductPairsSaturatedUint8x32
OpDotProductPairsSaturatedUint8x64
- OpDotProductQuadrupleInt32x4
- OpDotProductQuadrupleInt32x8
- OpDotProductQuadrupleInt32x16
- OpDotProductQuadrupleSaturatedInt32x4
- OpDotProductQuadrupleSaturatedInt32x8
- OpDotProductQuadrupleSaturatedInt32x16
OpEqualFloat32x4
OpEqualFloat32x8
OpEqualFloat32x16
},
},
},
- {
- name: "VPDPBUSD128",
- argLen: 3,
- resultInArg0: true,
- asm: x86.AVPDPBUSD,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
- {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
- },
- outputs: []outputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- },
- },
- {
- name: "VPDPBUSD256",
- argLen: 3,
- resultInArg0: true,
- asm: x86.AVPDPBUSD,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
- {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
- },
- outputs: []outputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- },
- },
- {
- name: "VPDPBUSD512",
- argLen: 3,
- resultInArg0: true,
- asm: x86.AVPDPBUSD,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDMasked128",
- argLen: 4,
- resultInArg0: true,
- asm: x86.AVPDPBUSD,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDMasked256",
- argLen: 4,
- resultInArg0: true,
- asm: x86.AVPDPBUSD,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDMasked512",
- argLen: 4,
- resultInArg0: true,
- asm: x86.AVPDPBUSD,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDS128",
- argLen: 3,
- resultInArg0: true,
- asm: x86.AVPDPBUSDS,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
- {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
- },
- outputs: []outputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- },
- },
- {
- name: "VPDPBUSDS256",
- argLen: 3,
- resultInArg0: true,
- asm: x86.AVPDPBUSDS,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
- {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
- },
- outputs: []outputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- },
- },
- {
- name: "VPDPBUSDS512",
- argLen: 3,
- resultInArg0: true,
- asm: x86.AVPDPBUSDS,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDSMasked128",
- argLen: 4,
- resultInArg0: true,
- asm: x86.AVPDPBUSDS,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDSMasked256",
- argLen: 4,
- resultInArg0: true,
- asm: x86.AVPDPBUSDS,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDSMasked512",
- argLen: 4,
- resultInArg0: true,
- asm: x86.AVPDPBUSDS,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
{
name: "VPDPWSSD128",
argLen: 3,
},
},
},
- {
- name: "VPDPBUSD512load",
- auxType: auxSymOff,
- argLen: 4,
- resultInArg0: true,
- symEffect: SymRead,
- asm: x86.AVPDPBUSD,
- reg: regInfo{
- inputs: []inputInfo{
- {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDMasked128load",
- auxType: auxSymOff,
- argLen: 5,
- resultInArg0: true,
- symEffect: SymRead,
- asm: x86.AVPDPBUSD,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDMasked256load",
- auxType: auxSymOff,
- argLen: 5,
- resultInArg0: true,
- symEffect: SymRead,
- asm: x86.AVPDPBUSD,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDMasked512load",
- auxType: auxSymOff,
- argLen: 5,
- resultInArg0: true,
- symEffect: SymRead,
- asm: x86.AVPDPBUSD,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDS512load",
- auxType: auxSymOff,
- argLen: 4,
- resultInArg0: true,
- symEffect: SymRead,
- asm: x86.AVPDPBUSDS,
- reg: regInfo{
- inputs: []inputInfo{
- {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDSMasked128load",
- auxType: auxSymOff,
- argLen: 5,
- resultInArg0: true,
- symEffect: SymRead,
- asm: x86.AVPDPBUSDS,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDSMasked256load",
- auxType: auxSymOff,
- argLen: 5,
- resultInArg0: true,
- symEffect: SymRead,
- asm: x86.AVPDPBUSDS,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDSMasked512load",
- auxType: auxSymOff,
- argLen: 5,
- resultInArg0: true,
- symEffect: SymRead,
- asm: x86.AVPDPBUSDS,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
{
name: "VPDPWSSD512load",
auxType: auxSymOff,
argLen: 2,
generic: true,
},
- {
- name: "DotProductQuadrupleInt32x4",
- argLen: 3,
- generic: true,
- },
- {
- name: "DotProductQuadrupleInt32x8",
- argLen: 3,
- generic: true,
- },
- {
- name: "DotProductQuadrupleInt32x16",
- argLen: 3,
- generic: true,
- },
- {
- name: "DotProductQuadrupleSaturatedInt32x4",
- argLen: 3,
- generic: true,
- },
- {
- name: "DotProductQuadrupleSaturatedInt32x8",
- argLen: 3,
- generic: true,
- },
- {
- name: "DotProductQuadrupleSaturatedInt32x16",
- argLen: 3,
- generic: true,
- },
{
name: "EqualFloat32x4",
argLen: 2,
return rewriteValueAMD64_OpAMD64VPACKUSDWMasked256(v)
case OpAMD64VPACKUSDWMasked512:
return rewriteValueAMD64_OpAMD64VPACKUSDWMasked512(v)
- case OpAMD64VPADDD128:
- return rewriteValueAMD64_OpAMD64VPADDD128(v)
- case OpAMD64VPADDD256:
- return rewriteValueAMD64_OpAMD64VPADDD256(v)
case OpAMD64VPADDD512:
return rewriteValueAMD64_OpAMD64VPADDD512(v)
case OpAMD64VPADDDMasked128:
return rewriteValueAMD64_OpAMD64VPCMPUQMasked256(v)
case OpAMD64VPCMPUQMasked512:
return rewriteValueAMD64_OpAMD64VPCMPUQMasked512(v)
- case OpAMD64VPDPBUSD512:
- return rewriteValueAMD64_OpAMD64VPDPBUSD512(v)
- case OpAMD64VPDPBUSDMasked128:
- return rewriteValueAMD64_OpAMD64VPDPBUSDMasked128(v)
- case OpAMD64VPDPBUSDMasked256:
- return rewriteValueAMD64_OpAMD64VPDPBUSDMasked256(v)
- case OpAMD64VPDPBUSDMasked512:
- return rewriteValueAMD64_OpAMD64VPDPBUSDMasked512(v)
- case OpAMD64VPDPBUSDS512:
- return rewriteValueAMD64_OpAMD64VPDPBUSDS512(v)
- case OpAMD64VPDPBUSDSMasked128:
- return rewriteValueAMD64_OpAMD64VPDPBUSDSMasked128(v)
- case OpAMD64VPDPBUSDSMasked256:
- return rewriteValueAMD64_OpAMD64VPDPBUSDSMasked256(v)
- case OpAMD64VPDPBUSDSMasked512:
- return rewriteValueAMD64_OpAMD64VPDPBUSDSMasked512(v)
case OpAMD64VPDPWSSD512:
return rewriteValueAMD64_OpAMD64VPDPWSSD512(v)
case OpAMD64VPDPWSSDMasked128:
case OpDotProductPairsSaturatedUint8x64:
v.Op = OpAMD64VPMADDUBSW512
return true
- case OpDotProductQuadrupleInt32x16:
- v.Op = OpAMD64VPDPBUSD512
- return true
- case OpDotProductQuadrupleInt32x4:
- v.Op = OpAMD64VPDPBUSD128
- return true
- case OpDotProductQuadrupleInt32x8:
- v.Op = OpAMD64VPDPBUSD256
- return true
- case OpDotProductQuadrupleSaturatedInt32x16:
- v.Op = OpAMD64VPDPBUSDS512
- return true
- case OpDotProductQuadrupleSaturatedInt32x4:
- v.Op = OpAMD64VPDPBUSDS128
- return true
- case OpDotProductQuadrupleSaturatedInt32x8:
- v.Op = OpAMD64VPDPBUSDS256
- return true
case OpEq16:
return rewriteValueAMD64_OpEq16(v)
case OpEq32:
v.AddArg3(x, y, mask)
return true
}
- // match: (VMOVDQU32Masked128 (VPDPBUSD128 x y z) mask)
- // result: (VPDPBUSDMasked128 x y z mask)
- for {
- if v_0.Op != OpAMD64VPDPBUSD128 {
- break
- }
- z := v_0.Args[2]
- x := v_0.Args[0]
- y := v_0.Args[1]
- mask := v_1
- v.reset(OpAMD64VPDPBUSDMasked128)
- v.AddArg4(x, y, z, mask)
- return true
- }
- // match: (VMOVDQU32Masked128 (VPDPBUSDS128 x y z) mask)
- // result: (VPDPBUSDSMasked128 x y z mask)
- for {
- if v_0.Op != OpAMD64VPDPBUSDS128 {
- break
- }
- z := v_0.Args[2]
- x := v_0.Args[0]
- y := v_0.Args[1]
- mask := v_1
- v.reset(OpAMD64VPDPBUSDSMasked128)
- v.AddArg4(x, y, z, mask)
- return true
- }
// match: (VMOVDQU32Masked128 (VPMOVSXDQ128 x) mask)
// result: (VPMOVSXDQMasked128 x mask)
for {
v.AddArg3(x, y, mask)
return true
}
- // match: (VMOVDQU32Masked256 (VPDPBUSD256 x y z) mask)
- // result: (VPDPBUSDMasked256 x y z mask)
- for {
- if v_0.Op != OpAMD64VPDPBUSD256 {
- break
- }
- z := v_0.Args[2]
- x := v_0.Args[0]
- y := v_0.Args[1]
- mask := v_1
- v.reset(OpAMD64VPDPBUSDMasked256)
- v.AddArg4(x, y, z, mask)
- return true
- }
- // match: (VMOVDQU32Masked256 (VPDPBUSDS256 x y z) mask)
- // result: (VPDPBUSDSMasked256 x y z mask)
- for {
- if v_0.Op != OpAMD64VPDPBUSDS256 {
- break
- }
- z := v_0.Args[2]
- x := v_0.Args[0]
- y := v_0.Args[1]
- mask := v_1
- v.reset(OpAMD64VPDPBUSDSMasked256)
- v.AddArg4(x, y, z, mask)
- return true
- }
// match: (VMOVDQU32Masked256 (VPMOVSXDQ256 x) mask)
// result: (VPMOVSXDQMasked256 x mask)
for {
v.AddArg3(x, y, mask)
return true
}
- // match: (VMOVDQU32Masked512 (VPDPBUSD512 x y z) mask)
- // result: (VPDPBUSDMasked512 x y z mask)
- for {
- if v_0.Op != OpAMD64VPDPBUSD512 {
- break
- }
- z := v_0.Args[2]
- x := v_0.Args[0]
- y := v_0.Args[1]
- mask := v_1
- v.reset(OpAMD64VPDPBUSDMasked512)
- v.AddArg4(x, y, z, mask)
- return true
- }
- // match: (VMOVDQU32Masked512 (VPDPBUSDS512 x y z) mask)
- // result: (VPDPBUSDSMasked512 x y z mask)
- for {
- if v_0.Op != OpAMD64VPDPBUSDS512 {
- break
- }
- z := v_0.Args[2]
- x := v_0.Args[0]
- y := v_0.Args[1]
- mask := v_1
- v.reset(OpAMD64VPDPBUSDSMasked512)
- v.AddArg4(x, y, z, mask)
- return true
- }
// match: (VMOVDQU32Masked512 (VPMOVSXDQ512 x) mask)
// result: (VPMOVSXDQMasked512 x mask)
for {
}
return false
}
-func rewriteValueAMD64_OpAMD64VPADDD128(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPADDD128 (VPDPBUSD128 (Zero128 <t>) x y) z)
- // result: (VPDPBUSD128 <t> z x y)
- for {
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- if v_0.Op != OpAMD64VPDPBUSD128 {
- continue
- }
- y := v_0.Args[2]
- v_0_0 := v_0.Args[0]
- if v_0_0.Op != OpAMD64Zero128 {
- continue
- }
- t := v_0_0.Type
- x := v_0.Args[1]
- z := v_1
- v.reset(OpAMD64VPDPBUSD128)
- v.Type = t
- v.AddArg3(z, x, y)
- return true
- }
- break
- }
- // match: (VPADDD128 (VPDPBUSDS128 (Zero128 <t>) x y) z)
- // result: (VPDPBUSDS128 <t> z x y)
- for {
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- if v_0.Op != OpAMD64VPDPBUSDS128 {
- continue
- }
- y := v_0.Args[2]
- v_0_0 := v_0.Args[0]
- if v_0_0.Op != OpAMD64Zero128 {
- continue
- }
- t := v_0_0.Type
- x := v_0.Args[1]
- z := v_1
- v.reset(OpAMD64VPDPBUSDS128)
- v.Type = t
- v.AddArg3(z, x, y)
- return true
- }
- break
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64VPADDD256(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPADDD256 (VPDPBUSD256 (Zero256 <t>) x y) z)
- // result: (VPDPBUSD256 <t> z x y)
- for {
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- if v_0.Op != OpAMD64VPDPBUSD256 {
- continue
- }
- y := v_0.Args[2]
- v_0_0 := v_0.Args[0]
- if v_0_0.Op != OpAMD64Zero256 {
- continue
- }
- t := v_0_0.Type
- x := v_0.Args[1]
- z := v_1
- v.reset(OpAMD64VPDPBUSD256)
- v.Type = t
- v.AddArg3(z, x, y)
- return true
- }
- break
- }
- // match: (VPADDD256 (VPDPBUSDS256 (Zero256 <t>) x y) z)
- // result: (VPDPBUSDS256 <t> z x y)
- for {
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- if v_0.Op != OpAMD64VPDPBUSDS256 {
- continue
- }
- y := v_0.Args[2]
- v_0_0 := v_0.Args[0]
- if v_0_0.Op != OpAMD64Zero256 {
- continue
- }
- t := v_0_0.Type
- x := v_0.Args[1]
- z := v_1
- v.reset(OpAMD64VPDPBUSDS256)
- v.Type = t
- v.AddArg3(z, x, y)
- return true
- }
- break
- }
- return false
-}
func rewriteValueAMD64_OpAMD64VPADDD512(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
- // match: (VPADDD512 (VPDPBUSD512 (Zero512 <t>) x y) z)
- // result: (VPDPBUSD512 <t> z x y)
- for {
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- if v_0.Op != OpAMD64VPDPBUSD512 {
- continue
- }
- y := v_0.Args[2]
- v_0_0 := v_0.Args[0]
- if v_0_0.Op != OpAMD64Zero512 {
- continue
- }
- t := v_0_0.Type
- x := v_0.Args[1]
- z := v_1
- v.reset(OpAMD64VPDPBUSD512)
- v.Type = t
- v.AddArg3(z, x, y)
- return true
- }
- break
- }
- // match: (VPADDD512 (VPDPBUSDS512 (Zero512 <t>) x y) z)
- // result: (VPDPBUSDS512 <t> z x y)
- for {
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- if v_0.Op != OpAMD64VPDPBUSDS512 {
- continue
- }
- y := v_0.Args[2]
- v_0_0 := v_0.Args[0]
- if v_0_0.Op != OpAMD64Zero512 {
- continue
- }
- t := v_0_0.Type
- x := v_0.Args[1]
- z := v_1
- v.reset(OpAMD64VPDPBUSDS512)
- v.Type = t
- v.AddArg3(z, x, y)
- return true
- }
- break
- }
// match: (VPADDD512 x l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
// result: (VPADDD512load {sym} [off] x ptr mem)
}
return false
}
-func rewriteValueAMD64_OpAMD64VPDPBUSD512(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPDPBUSD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem))
- // cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPDPBUSD512load {sym} [off] x y ptr mem)
- for {
- x := v_0
- y := v_1
- l := v_2
- if l.Op != OpAMD64VMOVDQUload512 {
- break
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- if !(canMergeLoad(v, l) && clobber(l)) {
- break
- }
- v.reset(OpAMD64VPDPBUSD512load)
- v.AuxInt = int32ToAuxInt(off)
- v.Aux = symToAux(sym)
- v.AddArg4(x, y, ptr, mem)
- return true
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64VPDPBUSDMasked128(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPDPBUSDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
- // cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPDPBUSDMasked128load {sym} [off] x y ptr mask mem)
- for {
- x := v_0
- y := v_1
- l := v_2
- if l.Op != OpAMD64VMOVDQUload128 {
- break
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_3
- if !(canMergeLoad(v, l) && clobber(l)) {
- break
- }
- v.reset(OpAMD64VPDPBUSDMasked128load)
- v.AuxInt = int32ToAuxInt(off)
- v.Aux = symToAux(sym)
- v.AddArg5(x, y, ptr, mask, mem)
- return true
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64VPDPBUSDMasked256(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPDPBUSDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
- // cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPDPBUSDMasked256load {sym} [off] x y ptr mask mem)
- for {
- x := v_0
- y := v_1
- l := v_2
- if l.Op != OpAMD64VMOVDQUload256 {
- break
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_3
- if !(canMergeLoad(v, l) && clobber(l)) {
- break
- }
- v.reset(OpAMD64VPDPBUSDMasked256load)
- v.AuxInt = int32ToAuxInt(off)
- v.Aux = symToAux(sym)
- v.AddArg5(x, y, ptr, mask, mem)
- return true
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64VPDPBUSDMasked512(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPDPBUSDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
- // cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPDPBUSDMasked512load {sym} [off] x y ptr mask mem)
- for {
- x := v_0
- y := v_1
- l := v_2
- if l.Op != OpAMD64VMOVDQUload512 {
- break
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_3
- if !(canMergeLoad(v, l) && clobber(l)) {
- break
- }
- v.reset(OpAMD64VPDPBUSDMasked512load)
- v.AuxInt = int32ToAuxInt(off)
- v.Aux = symToAux(sym)
- v.AddArg5(x, y, ptr, mask, mem)
- return true
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64VPDPBUSDS512(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPDPBUSDS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem))
- // cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPDPBUSDS512load {sym} [off] x y ptr mem)
- for {
- x := v_0
- y := v_1
- l := v_2
- if l.Op != OpAMD64VMOVDQUload512 {
- break
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- if !(canMergeLoad(v, l) && clobber(l)) {
- break
- }
- v.reset(OpAMD64VPDPBUSDS512load)
- v.AuxInt = int32ToAuxInt(off)
- v.Aux = symToAux(sym)
- v.AddArg4(x, y, ptr, mem)
- return true
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64VPDPBUSDSMasked128(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPDPBUSDSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
- // cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPDPBUSDSMasked128load {sym} [off] x y ptr mask mem)
- for {
- x := v_0
- y := v_1
- l := v_2
- if l.Op != OpAMD64VMOVDQUload128 {
- break
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_3
- if !(canMergeLoad(v, l) && clobber(l)) {
- break
- }
- v.reset(OpAMD64VPDPBUSDSMasked128load)
- v.AuxInt = int32ToAuxInt(off)
- v.Aux = symToAux(sym)
- v.AddArg5(x, y, ptr, mask, mem)
- return true
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64VPDPBUSDSMasked256(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPDPBUSDSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
- // cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPDPBUSDSMasked256load {sym} [off] x y ptr mask mem)
- for {
- x := v_0
- y := v_1
- l := v_2
- if l.Op != OpAMD64VMOVDQUload256 {
- break
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_3
- if !(canMergeLoad(v, l) && clobber(l)) {
- break
- }
- v.reset(OpAMD64VPDPBUSDSMasked256load)
- v.AuxInt = int32ToAuxInt(off)
- v.Aux = symToAux(sym)
- v.AddArg5(x, y, ptr, mask, mem)
- return true
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64VPDPBUSDSMasked512(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPDPBUSDSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
- // cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPDPBUSDSMasked512load {sym} [off] x y ptr mask mem)
- for {
- x := v_0
- y := v_1
- l := v_2
- if l.Op != OpAMD64VMOVDQUload512 {
- break
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_3
- if !(canMergeLoad(v, l) && clobber(l)) {
- break
- }
- v.reset(OpAMD64VPDPBUSDSMasked512load)
- v.AuxInt = int32ToAuxInt(off)
- v.Aux = symToAux(sym)
- v.AddArg5(x, y, ptr, mask, mem)
- return true
- }
- return false
-}
func rewriteValueAMD64_OpAMD64VPDPWSSD512(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
addF(simdPackage, "Uint8x16.DotProductPairsSaturated", opLen2(ssa.OpDotProductPairsSaturatedUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.DotProductPairsSaturated", opLen2(ssa.OpDotProductPairsSaturatedUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.DotProductPairsSaturated", opLen2(ssa.OpDotProductPairsSaturatedUint8x64, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int8x16.DotProductQuadruple", opLen3_31Zero3(ssa.OpDotProductQuadrupleInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x32.DotProductQuadruple", opLen3_31Zero3(ssa.OpDotProductQuadrupleInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x64.DotProductQuadruple", opLen3_31Zero3(ssa.OpDotProductQuadrupleInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int8x16.DotProductQuadrupleSaturated", opLen3_31Zero3(ssa.OpDotProductQuadrupleSaturatedInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x32.DotProductQuadrupleSaturated", opLen3_31Zero3(ssa.OpDotProductQuadrupleSaturatedInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x64.DotProductQuadrupleSaturated", opLen3_31Zero3(ssa.OpDotProductQuadrupleSaturatedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.Equal", opLen2(ssa.OpEqualInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.Equal", opLen2(ssa.OpEqualInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.Equal", opLen2(ssa.OpEqualInt8x64, types.TypeVec512), sys.AMD64)
documentation: !string |-
// NAME multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size.
-# QuadDotProduct, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
# - go: DotProductBroadcast
# commutative: true
# # documentation: !string |-
# // NAME multiplies all elements and broadcasts the sum.
-- go: DotProductQuadruple
- commutative: false
- documentation: !string |-
- // NAME performs dot products on groups of 4 elements of x and y.
- // NAME(x, y).Add(z) will be optimized to the full form of the underlying instruction.
-- go: DotProductQuadrupleSaturated
- commutative: false
- documentation: !string |-
- // NAME multiplies performs dot products on groups of 4 elements of x and y.
- // NAME(x, y).Add(z) will be optimized to the full form of the underlying instruction.
- go: AddDotProductPairs
commutative: false
noTypes: "true"
# const: 127
# out:
# - *dpb_src
-- go: DotProductQuadruple
- asm: "VPDPBUSD"
- operandOrder: "31Zero3" # switch operand 3 and 1, and make 3 always 0
- in:
- - &qdpa_acc
- go: $t_acc
- base: int
- elemBits: 32
- - &qdpa_src1
- go: $t_src1
- base: uint
- overwriteElementBits: 8
- - &qdpa_src2
- go: $t_src2
- base: int
- overwriteElementBits: 8
- out:
- - *qdpa_acc
-- go: DotProductQuadrupleSaturated
- asm: "VPDPBUSDS"
- operandOrder: "31Zero3" # switch operand 3 and 1, and make 3 always 0
- in:
- - *qdpa_acc
- - *qdpa_src1
- - *qdpa_src2
- out:
- - *qdpa_acc
- go: AddDotProductPairs
asm: "VPDPWSSD"
in:
}
}
-func TestDotProductQuadruple(t *testing.T) {
- if !archsimd.X86.AVXVNNI() {
- t.Skip("Test requires X86.AVXVNNI, not available on this hardware")
- return
- }
- xd := make([]int8, 16)
- yd := make([]uint8, 16)
- zd := make([]int32, 4)
- wanted1 := make([]int32, 4)
- wanted2 := make([]int32, 4)
- res1 := make([]int32, 4)
- res2 := make([]int32, 4)
- for i := range 16 {
- xd[i] = int8(i + 112) // 112+15 = 127
- yd[i] = uint8(i + 240) // 240+15 = 255
- }
- for i := range 4 {
- i4 := 4 * i
- wanted1[i] = int32(xd[i4])*int32(yd[i4]) + int32(xd[i4+1])*int32(yd[i4+1]) + int32(xd[i4+2])*int32(yd[i4+2]) + int32(xd[i4+3])*int32(yd[i4+3])
- zd[i] = int32(i + 1)
- wanted2[i] = wanted1[i] + zd[i]
- }
-
- x := archsimd.LoadInt8x16Slice(xd)
- y := archsimd.LoadUint8x16Slice(yd)
- z := archsimd.LoadInt32x4Slice(zd)
- x.DotProductQuadruple(y).StoreSlice(res1)
- x.DotProductQuadruple(y).Add(z).StoreSlice(res2)
- for i := range 4 {
- if res1[i] != wanted1[i] {
- t.Errorf("got %d wanted %d", res1[i], wanted1[i])
- }
- if res2[i] != wanted2[i] {
- t.Errorf("got %d wanted %d", res2[i], wanted2[i])
- }
- }
-}
-
func TestPermuteScalars(t *testing.T) {
x := []int32{11, 12, 13, 14}
want := []int32{12, 13, 14, 11}
// Asm: VPMADDUBSW, CPU Feature: AVX512
func (x Uint8x64) DotProductPairsSaturated(y Int8x64) Int16x32
-/* DotProductQuadruple */
-
-// DotProductQuadruple performs dot products on groups of 4 elements of x and y.
-// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction.
-//
-// Asm: VPDPBUSD, CPU Feature: AVXVNNI
-func (x Int8x16) DotProductQuadruple(y Uint8x16) Int32x4
-
-// DotProductQuadruple performs dot products on groups of 4 elements of x and y.
-// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction.
-//
-// Asm: VPDPBUSD, CPU Feature: AVXVNNI
-func (x Int8x32) DotProductQuadruple(y Uint8x32) Int32x8
-
-// DotProductQuadruple performs dot products on groups of 4 elements of x and y.
-// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction.
-//
-// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int8x64) DotProductQuadruple(y Uint8x64) Int32x16
-
-/* DotProductQuadrupleSaturated */
-
-// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y.
-// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction.
-//
-// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
-func (x Int8x16) DotProductQuadrupleSaturated(y Uint8x16) Int32x4
-
-// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y.
-// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction.
-//
-// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
-func (x Int8x32) DotProductQuadrupleSaturated(y Uint8x32) Int32x8
-
-// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y.
-// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction.
-//
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x64) DotProductQuadrupleSaturated(y Uint8x64) Int32x16
-
/* Equal */
// Equal returns a mask whose elements indicate whether x == y.