ssa.OpAMD64VPMOVZXBQ256,
ssa.OpAMD64VPMOVZXWQ256,
ssa.OpAMD64VPMOVZXBQ512,
+ ssa.OpAMD64VPLZCNTD128,
+ ssa.OpAMD64VPLZCNTD256,
+ ssa.OpAMD64VPLZCNTD512,
+ ssa.OpAMD64VPLZCNTQ128,
+ ssa.OpAMD64VPLZCNTQ256,
+ ssa.OpAMD64VPLZCNTQ512,
ssa.OpAMD64VPOPCNTB128,
ssa.OpAMD64VPOPCNTB256,
ssa.OpAMD64VPOPCNTB512,
ssa.OpAMD64VPEXPANDQMasked128,
ssa.OpAMD64VPEXPANDQMasked256,
ssa.OpAMD64VPEXPANDQMasked512,
+ ssa.OpAMD64VPLZCNTDMasked128,
+ ssa.OpAMD64VPLZCNTDMasked256,
+ ssa.OpAMD64VPLZCNTDMasked512,
+ ssa.OpAMD64VPLZCNTQMasked128,
+ ssa.OpAMD64VPLZCNTQMasked256,
+ ssa.OpAMD64VPLZCNTQMasked512,
ssa.OpAMD64VPOPCNTBMasked128,
ssa.OpAMD64VPOPCNTBMasked256,
ssa.OpAMD64VPOPCNTBMasked512,
ssa.OpAMD64VGF2P8MULBMasked128,
ssa.OpAMD64VGF2P8MULBMasked256,
ssa.OpAMD64VGF2P8MULBMasked512,
+ ssa.OpAMD64VPLZCNTDMasked128,
+ ssa.OpAMD64VPLZCNTDMasked256,
+ ssa.OpAMD64VPLZCNTDMasked512,
+ ssa.OpAMD64VPLZCNTQMasked128,
+ ssa.OpAMD64VPLZCNTQMasked256,
+ ssa.OpAMD64VPLZCNTQMasked512,
ssa.OpAMD64VMAXPSMasked128,
ssa.OpAMD64VMAXPSMasked256,
ssa.OpAMD64VMAXPSMasked512,
(IsNanFloat64x2 x y) => (VCMPPD128 [3] x y)
(IsNanFloat64x4 x y) => (VCMPPD256 [3] x y)
(IsNanFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [3] x y))
+(LeadingZerosInt32x4 ...) => (VPLZCNTD128 ...)
+(LeadingZerosInt32x8 ...) => (VPLZCNTD256 ...)
+(LeadingZerosInt32x16 ...) => (VPLZCNTD512 ...)
+(LeadingZerosInt64x2 ...) => (VPLZCNTQ128 ...)
+(LeadingZerosInt64x4 ...) => (VPLZCNTQ256 ...)
+(LeadingZerosInt64x8 ...) => (VPLZCNTQ512 ...)
+(LeadingZerosUint32x4 ...) => (VPLZCNTD128 ...)
+(LeadingZerosUint32x8 ...) => (VPLZCNTD256 ...)
+(LeadingZerosUint32x16 ...) => (VPLZCNTD512 ...)
+(LeadingZerosUint64x2 ...) => (VPLZCNTQ128 ...)
+(LeadingZerosUint64x4 ...) => (VPLZCNTQ256 ...)
+(LeadingZerosUint64x8 ...) => (VPLZCNTQ512 ...)
(LessFloat32x4 x y) => (VCMPPS128 [1] x y)
(LessFloat32x8 x y) => (VCMPPS256 [1] x y)
(LessFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [1] x y))
(VMOVDQU8Masked512 (VGF2P8AFFINEINVQB512 [a] x y) mask) => (VGF2P8AFFINEINVQBMasked512 [a] x y mask)
(VMOVDQU8Masked512 (VGF2P8AFFINEQB512 [a] x y) mask) => (VGF2P8AFFINEQBMasked512 [a] x y mask)
(VMOVDQU8Masked512 (VGF2P8MULB512 x y) mask) => (VGF2P8MULBMasked512 x y mask)
+(VMOVDQU32Masked512 (VPLZCNTD512 x) mask) => (VPLZCNTDMasked512 x mask)
+(VMOVDQU64Masked512 (VPLZCNTQ512 x) mask) => (VPLZCNTQMasked512 x mask)
(VMOVDQU32Masked512 (VMAXPS512 x y) mask) => (VMAXPSMasked512 x y mask)
(VMOVDQU64Masked512 (VMAXPD512 x y) mask) => (VMAXPDMasked512 x y mask)
(VMOVDQU8Masked512 (VPMAXSB512 x y) mask) => (VPMAXSBMasked512 x y mask)
{name: "VPHSUBSW256", argLength: 2, reg: v21, asm: "VPHSUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPHSUBW128", argLength: 2, reg: v21, asm: "VPHSUBW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPHSUBW256", argLength: 2, reg: v21, asm: "VPHSUBW", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPLZCNTD128", argLength: 1, reg: w11, asm: "VPLZCNTD", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPLZCNTD256", argLength: 1, reg: w11, asm: "VPLZCNTD", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPLZCNTD512", argLength: 1, reg: w11, asm: "VPLZCNTD", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPLZCNTDMasked128", argLength: 2, reg: wkw, asm: "VPLZCNTD", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPLZCNTDMasked256", argLength: 2, reg: wkw, asm: "VPLZCNTD", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPLZCNTDMasked512", argLength: 2, reg: wkw, asm: "VPLZCNTD", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPLZCNTQ128", argLength: 1, reg: w11, asm: "VPLZCNTQ", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPLZCNTQ256", argLength: 1, reg: w11, asm: "VPLZCNTQ", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPLZCNTQ512", argLength: 1, reg: w11, asm: "VPLZCNTQ", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPLZCNTQMasked128", argLength: 2, reg: wkw, asm: "VPLZCNTQ", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPLZCNTQMasked256", argLength: 2, reg: wkw, asm: "VPLZCNTQ", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPLZCNTQMasked512", argLength: 2, reg: wkw, asm: "VPLZCNTQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMADDUBSW128", argLength: 2, reg: v21, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMADDUBSW256", argLength: 2, reg: v21, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMADDUBSW512", argLength: 2, reg: w21, asm: "VPMADDUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "IsNanFloat64x2", argLength: 2, commutative: true},
{name: "IsNanFloat64x4", argLength: 2, commutative: true},
{name: "IsNanFloat64x8", argLength: 2, commutative: true},
+ {name: "LeadingZerosInt32x4", argLength: 1, commutative: false},
+ {name: "LeadingZerosInt32x8", argLength: 1, commutative: false},
+ {name: "LeadingZerosInt32x16", argLength: 1, commutative: false},
+ {name: "LeadingZerosInt64x2", argLength: 1, commutative: false},
+ {name: "LeadingZerosInt64x4", argLength: 1, commutative: false},
+ {name: "LeadingZerosInt64x8", argLength: 1, commutative: false},
+ {name: "LeadingZerosUint32x4", argLength: 1, commutative: false},
+ {name: "LeadingZerosUint32x8", argLength: 1, commutative: false},
+ {name: "LeadingZerosUint32x16", argLength: 1, commutative: false},
+ {name: "LeadingZerosUint64x2", argLength: 1, commutative: false},
+ {name: "LeadingZerosUint64x4", argLength: 1, commutative: false},
+ {name: "LeadingZerosUint64x8", argLength: 1, commutative: false},
{name: "LessEqualFloat32x4", argLength: 2, commutative: false},
{name: "LessEqualFloat32x8", argLength: 2, commutative: false},
{name: "LessEqualFloat32x16", argLength: 2, commutative: false},
OpAMD64VPHSUBSW256
OpAMD64VPHSUBW128
OpAMD64VPHSUBW256
+ OpAMD64VPLZCNTD128
+ OpAMD64VPLZCNTD256
+ OpAMD64VPLZCNTD512
+ OpAMD64VPLZCNTDMasked128
+ OpAMD64VPLZCNTDMasked256
+ OpAMD64VPLZCNTDMasked512
+ OpAMD64VPLZCNTQ128
+ OpAMD64VPLZCNTQ256
+ OpAMD64VPLZCNTQ512
+ OpAMD64VPLZCNTQMasked128
+ OpAMD64VPLZCNTQMasked256
+ OpAMD64VPLZCNTQMasked512
OpAMD64VPMADDUBSW128
OpAMD64VPMADDUBSW256
OpAMD64VPMADDUBSW512
OpIsNanFloat64x2
OpIsNanFloat64x4
OpIsNanFloat64x8
+ OpLeadingZerosInt32x4
+ OpLeadingZerosInt32x8
+ OpLeadingZerosInt32x16
+ OpLeadingZerosInt64x2
+ OpLeadingZerosInt64x4
+ OpLeadingZerosInt64x8
+ OpLeadingZerosUint32x4
+ OpLeadingZerosUint32x8
+ OpLeadingZerosUint32x16
+ OpLeadingZerosUint64x2
+ OpLeadingZerosUint64x4
+ OpLeadingZerosUint64x8
OpLessEqualFloat32x4
OpLessEqualFloat32x8
OpLessEqualFloat32x16
},
},
},
+ {
+ name: "VPLZCNTD128",
+ argLen: 1,
+ asm: x86.AVPLZCNTD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPLZCNTD256",
+ argLen: 1,
+ asm: x86.AVPLZCNTD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPLZCNTD512",
+ argLen: 1,
+ asm: x86.AVPLZCNTD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPLZCNTDMasked128",
+ argLen: 2,
+ asm: x86.AVPLZCNTD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPLZCNTDMasked256",
+ argLen: 2,
+ asm: x86.AVPLZCNTD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPLZCNTDMasked512",
+ argLen: 2,
+ asm: x86.AVPLZCNTD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPLZCNTQ128",
+ argLen: 1,
+ asm: x86.AVPLZCNTQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPLZCNTQ256",
+ argLen: 1,
+ asm: x86.AVPLZCNTQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPLZCNTQ512",
+ argLen: 1,
+ asm: x86.AVPLZCNTQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPLZCNTQMasked128",
+ argLen: 2,
+ asm: x86.AVPLZCNTQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPLZCNTQMasked256",
+ argLen: 2,
+ asm: x86.AVPLZCNTQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPLZCNTQMasked512",
+ argLen: 2,
+ asm: x86.AVPLZCNTQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
{
name: "VPMADDUBSW128",
argLen: 2,
commutative: true,
generic: true,
},
+ {
+ name: "LeadingZerosInt32x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "LeadingZerosInt32x8",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "LeadingZerosInt32x16",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "LeadingZerosInt64x2",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "LeadingZerosInt64x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "LeadingZerosInt64x8",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "LeadingZerosUint32x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "LeadingZerosUint32x8",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "LeadingZerosUint32x16",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "LeadingZerosUint64x2",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "LeadingZerosUint64x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "LeadingZerosUint64x8",
+ argLen: 1,
+ generic: true,
+ },
{
name: "LessEqualFloat32x4",
argLen: 2,
return rewriteValueAMD64_OpIsNonNil(v)
case OpIsSliceInBounds:
return rewriteValueAMD64_OpIsSliceInBounds(v)
+ case OpLeadingZerosInt32x16:
+ v.Op = OpAMD64VPLZCNTD512
+ return true
+ case OpLeadingZerosInt32x4:
+ v.Op = OpAMD64VPLZCNTD128
+ return true
+ case OpLeadingZerosInt32x8:
+ v.Op = OpAMD64VPLZCNTD256
+ return true
+ case OpLeadingZerosInt64x2:
+ v.Op = OpAMD64VPLZCNTQ128
+ return true
+ case OpLeadingZerosInt64x4:
+ v.Op = OpAMD64VPLZCNTQ256
+ return true
+ case OpLeadingZerosInt64x8:
+ v.Op = OpAMD64VPLZCNTQ512
+ return true
+ case OpLeadingZerosUint32x16:
+ v.Op = OpAMD64VPLZCNTD512
+ return true
+ case OpLeadingZerosUint32x4:
+ v.Op = OpAMD64VPLZCNTD128
+ return true
+ case OpLeadingZerosUint32x8:
+ v.Op = OpAMD64VPLZCNTD256
+ return true
+ case OpLeadingZerosUint64x2:
+ v.Op = OpAMD64VPLZCNTQ128
+ return true
+ case OpLeadingZerosUint64x4:
+ v.Op = OpAMD64VPLZCNTQ256
+ return true
+ case OpLeadingZerosUint64x8:
+ v.Op = OpAMD64VPLZCNTQ512
+ return true
case OpLeq16:
return rewriteValueAMD64_OpLeq16(v)
case OpLeq16U:
v.AddArg3(x, y, mask)
return true
}
+ // match: (VMOVDQU32Masked512 (VPLZCNTD512 x) mask)
+ // result: (VPLZCNTDMasked512 x mask)
+ for {
+ if v_0.Op != OpAMD64VPLZCNTD512 {
+ break
+ }
+ x := v_0.Args[0]
+ mask := v_1
+ v.reset(OpAMD64VPLZCNTDMasked512)
+ v.AddArg2(x, mask)
+ return true
+ }
// match: (VMOVDQU32Masked512 (VMAXPS512 x y) mask)
// result: (VMAXPSMasked512 x y mask)
for {
v.AddArg3(x, y, mask)
return true
}
+ // match: (VMOVDQU64Masked512 (VPLZCNTQ512 x) mask)
+ // result: (VPLZCNTQMasked512 x mask)
+ for {
+ if v_0.Op != OpAMD64VPLZCNTQ512 {
+ break
+ }
+ x := v_0.Args[0]
+ mask := v_1
+ v.reset(OpAMD64VPLZCNTQMasked512)
+ v.AddArg2(x, mask)
+ return true
+ }
// match: (VMOVDQU64Masked512 (VMAXPD512 x y) mask)
// result: (VMAXPDMasked512 x y mask)
for {
addF(simdPackage, "Float64x2.IsNan", opLen2(ssa.OpIsNanFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.IsNan", opLen2(ssa.OpIsNanFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.IsNan", opLen2(ssa.OpIsNanFloat64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x4.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x8.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x16.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int64x2.LeadingZeros", opLen1(ssa.OpLeadingZerosInt64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int64x4.LeadingZeros", opLen1(ssa.OpLeadingZerosInt64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int64x8.LeadingZeros", opLen1(ssa.OpLeadingZerosInt64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint32x4.LeadingZeros", opLen1(ssa.OpLeadingZerosUint32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x8.LeadingZeros", opLen1(ssa.OpLeadingZerosUint32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x16.LeadingZeros", opLen1(ssa.OpLeadingZerosUint32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint64x2.LeadingZeros", opLen1(ssa.OpLeadingZerosUint64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint64x4.LeadingZeros", opLen1(ssa.OpLeadingZerosUint64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint64x8.LeadingZeros", opLen1(ssa.OpLeadingZerosUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.Less", opLen2(ssa.OpLessFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Less", opLen2(ssa.OpLessFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.Less", opLen2(ssa.OpLessFloat32x16, types.TypeVec512), sys.AMD64)
--- /dev/null
+!sum
+- go: LeadingZeros
+ commutative: false
+ documentation: !string |-
+ // NAME counts the leading zeros of each element in x.
--- /dev/null
+!sum
+- go: LeadingZeros
+ asm: "VPLZCNT[DQ]"
+ in:
+ - &any
+ go: $t
+ out:
+ - *any
checkSlices[int64](t, r, []int64{11, 22, 33, 44})
checkSlices[int64](t, s, []int64{9, 18, 27, 36})
}
+
+func TestLeadingZeros(t *testing.T) {
+ if !simd.HasAVX512() {
+ t.Skip("Test requires HasAVX512, not available on this hardware")
+ return
+ }
+
+ src := []uint64{0b1111, 0}
+ want := []uint64{60, 64}
+ got := make([]uint64, 2)
+ simd.LoadUint64x2Slice(src).LeadingZeros().StoreSlice(got)
+ for i := range 2 {
+ if want[i] != got[i] {
+ t.Errorf("Result incorrect at %d: want %d, got %d", i, want[i], got[i])
+ }
+ }
+}
// Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) IsNan(y Float64x8) Mask64x8
+/* LeadingZeros */
+
+// LeadingZeros counts the leading zeros of each element in x.
+//
+// Asm: VPLZCNTD, CPU Feature: AVX512
+func (x Int32x4) LeadingZeros() Int32x4
+
+// LeadingZeros counts the leading zeros of each element in x.
+//
+// Asm: VPLZCNTD, CPU Feature: AVX512
+func (x Int32x8) LeadingZeros() Int32x8
+
+// LeadingZeros counts the leading zeros of each element in x.
+//
+// Asm: VPLZCNTD, CPU Feature: AVX512
+func (x Int32x16) LeadingZeros() Int32x16
+
+// LeadingZeros counts the leading zeros of each element in x.
+//
+// Asm: VPLZCNTQ, CPU Feature: AVX512
+func (x Int64x2) LeadingZeros() Int64x2
+
+// LeadingZeros counts the leading zeros of each element in x.
+//
+// Asm: VPLZCNTQ, CPU Feature: AVX512
+func (x Int64x4) LeadingZeros() Int64x4
+
+// LeadingZeros counts the leading zeros of each element in x.
+//
+// Asm: VPLZCNTQ, CPU Feature: AVX512
+func (x Int64x8) LeadingZeros() Int64x8
+
+// LeadingZeros counts the leading zeros of each element in x.
+//
+// Asm: VPLZCNTD, CPU Feature: AVX512
+func (x Uint32x4) LeadingZeros() Uint32x4
+
+// LeadingZeros counts the leading zeros of each element in x.
+//
+// Asm: VPLZCNTD, CPU Feature: AVX512
+func (x Uint32x8) LeadingZeros() Uint32x8
+
+// LeadingZeros counts the leading zeros of each element in x.
+//
+// Asm: VPLZCNTD, CPU Feature: AVX512
+func (x Uint32x16) LeadingZeros() Uint32x16
+
+// LeadingZeros counts the leading zeros of each element in x.
+//
+// Asm: VPLZCNTQ, CPU Feature: AVX512
+func (x Uint64x2) LeadingZeros() Uint64x2
+
+// LeadingZeros counts the leading zeros of each element in x.
+//
+// Asm: VPLZCNTQ, CPU Feature: AVX512
+func (x Uint64x4) LeadingZeros() Uint64x4
+
+// LeadingZeros counts the leading zeros of each element in x.
+//
+// Asm: VPLZCNTQ, CPU Feature: AVX512
+func (x Uint64x8) LeadingZeros() Uint64x8
+
/* Less */
// Less compares for less than.