ssa.OpAMD64VPABSQ128,
ssa.OpAMD64VPABSQ256,
ssa.OpAMD64VPABSQ512,
- ssa.OpAMD64VBROADCASTSS128,
ssa.OpAMD64VPBROADCASTQ128,
- ssa.OpAMD64VPBROADCASTB128,
- ssa.OpAMD64VPBROADCASTW128,
+ ssa.OpAMD64VBROADCASTSS128,
+ ssa.OpAMD64VBROADCASTSD256,
ssa.OpAMD64VPBROADCASTD128,
+ ssa.OpAMD64VPBROADCASTQ256,
ssa.OpAMD64VBROADCASTSS256,
- ssa.OpAMD64VBROADCASTSD256,
- ssa.OpAMD64VPBROADCASTB256,
- ssa.OpAMD64VPBROADCASTW256,
+ ssa.OpAMD64VBROADCASTSD512,
+ ssa.OpAMD64VPBROADCASTW128,
ssa.OpAMD64VPBROADCASTD256,
- ssa.OpAMD64VPBROADCASTQ256,
+ ssa.OpAMD64VPBROADCASTQ512,
ssa.OpAMD64VBROADCASTSS512,
- ssa.OpAMD64VBROADCASTSD512,
- ssa.OpAMD64VPBROADCASTB512,
- ssa.OpAMD64VPBROADCASTW512,
+ ssa.OpAMD64VPBROADCASTB128,
+ ssa.OpAMD64VPBROADCASTW256,
ssa.OpAMD64VPBROADCASTD512,
- ssa.OpAMD64VPBROADCASTQ512,
+ ssa.OpAMD64VPBROADCASTB256,
+ ssa.OpAMD64VPBROADCASTW512,
+ ssa.OpAMD64VPBROADCASTB512,
ssa.OpAMD64VCVTPD2PSX128,
ssa.OpAMD64VCVTPD2PSY128,
ssa.OpAMD64VCVTPD2PS256,
ssa.OpAMD64VPABSQMasked128,
ssa.OpAMD64VPABSQMasked256,
ssa.OpAMD64VPABSQMasked512,
- ssa.OpAMD64VBROADCASTSSMasked128,
ssa.OpAMD64VPBROADCASTQMasked128,
- ssa.OpAMD64VPBROADCASTBMasked128,
- ssa.OpAMD64VPBROADCASTWMasked128,
+ ssa.OpAMD64VBROADCASTSSMasked128,
+ ssa.OpAMD64VBROADCASTSDMasked256,
ssa.OpAMD64VPBROADCASTDMasked128,
+ ssa.OpAMD64VPBROADCASTQMasked256,
ssa.OpAMD64VBROADCASTSSMasked256,
- ssa.OpAMD64VBROADCASTSDMasked256,
- ssa.OpAMD64VPBROADCASTBMasked256,
- ssa.OpAMD64VPBROADCASTWMasked256,
+ ssa.OpAMD64VBROADCASTSDMasked512,
+ ssa.OpAMD64VPBROADCASTWMasked128,
ssa.OpAMD64VPBROADCASTDMasked256,
- ssa.OpAMD64VPBROADCASTQMasked256,
+ ssa.OpAMD64VPBROADCASTQMasked512,
ssa.OpAMD64VBROADCASTSSMasked512,
- ssa.OpAMD64VBROADCASTSDMasked512,
- ssa.OpAMD64VPBROADCASTBMasked512,
- ssa.OpAMD64VPBROADCASTWMasked512,
+ ssa.OpAMD64VPBROADCASTBMasked128,
+ ssa.OpAMD64VPBROADCASTWMasked256,
ssa.OpAMD64VPBROADCASTDMasked512,
- ssa.OpAMD64VPBROADCASTQMasked512,
+ ssa.OpAMD64VPBROADCASTBMasked256,
+ ssa.OpAMD64VPBROADCASTWMasked512,
+ ssa.OpAMD64VPBROADCASTBMasked512,
ssa.OpAMD64VCOMPRESSPSMasked128,
ssa.OpAMD64VCOMPRESSPSMasked256,
ssa.OpAMD64VCOMPRESSPSMasked512,
ssa.OpAMD64VPABSQMasked128Merging,
ssa.OpAMD64VPABSQMasked256Merging,
ssa.OpAMD64VPABSQMasked512Merging,
- ssa.OpAMD64VBROADCASTSSMasked128Merging,
ssa.OpAMD64VPBROADCASTQMasked128Merging,
- ssa.OpAMD64VPBROADCASTBMasked128Merging,
- ssa.OpAMD64VPBROADCASTWMasked128Merging,
+ ssa.OpAMD64VBROADCASTSSMasked128Merging,
+ ssa.OpAMD64VBROADCASTSDMasked256Merging,
ssa.OpAMD64VPBROADCASTDMasked128Merging,
+ ssa.OpAMD64VPBROADCASTQMasked256Merging,
ssa.OpAMD64VBROADCASTSSMasked256Merging,
- ssa.OpAMD64VBROADCASTSDMasked256Merging,
- ssa.OpAMD64VPBROADCASTBMasked256Merging,
- ssa.OpAMD64VPBROADCASTWMasked256Merging,
+ ssa.OpAMD64VBROADCASTSDMasked512Merging,
+ ssa.OpAMD64VPBROADCASTWMasked128Merging,
ssa.OpAMD64VPBROADCASTDMasked256Merging,
- ssa.OpAMD64VPBROADCASTQMasked256Merging,
+ ssa.OpAMD64VPBROADCASTQMasked512Merging,
ssa.OpAMD64VBROADCASTSSMasked512Merging,
- ssa.OpAMD64VBROADCASTSDMasked512Merging,
- ssa.OpAMD64VPBROADCASTBMasked512Merging,
- ssa.OpAMD64VPBROADCASTWMasked512Merging,
+ ssa.OpAMD64VPBROADCASTBMasked128Merging,
+ ssa.OpAMD64VPBROADCASTWMasked256Merging,
ssa.OpAMD64VPBROADCASTDMasked512Merging,
- ssa.OpAMD64VPBROADCASTQMasked512Merging,
+ ssa.OpAMD64VPBROADCASTBMasked256Merging,
+ ssa.OpAMD64VPBROADCASTWMasked512Merging,
+ ssa.OpAMD64VPBROADCASTBMasked512Merging,
ssa.OpAMD64VRNDSCALEPSMasked128Merging,
ssa.OpAMD64VRNDSCALEPSMasked256Merging,
ssa.OpAMD64VRNDSCALEPSMasked512Merging,
ssa.OpAMD64VPAVGWMasked128,
ssa.OpAMD64VPAVGWMasked256,
ssa.OpAMD64VPAVGWMasked512,
- ssa.OpAMD64VBROADCASTSSMasked128,
ssa.OpAMD64VPBROADCASTQMasked128,
- ssa.OpAMD64VPBROADCASTBMasked128,
- ssa.OpAMD64VPBROADCASTWMasked128,
+ ssa.OpAMD64VBROADCASTSSMasked128,
+ ssa.OpAMD64VBROADCASTSDMasked256,
ssa.OpAMD64VPBROADCASTDMasked128,
+ ssa.OpAMD64VPBROADCASTQMasked256,
ssa.OpAMD64VBROADCASTSSMasked256,
- ssa.OpAMD64VBROADCASTSDMasked256,
- ssa.OpAMD64VPBROADCASTBMasked256,
- ssa.OpAMD64VPBROADCASTWMasked256,
+ ssa.OpAMD64VBROADCASTSDMasked512,
+ ssa.OpAMD64VPBROADCASTWMasked128,
ssa.OpAMD64VPBROADCASTDMasked256,
- ssa.OpAMD64VPBROADCASTQMasked256,
+ ssa.OpAMD64VPBROADCASTQMasked512,
ssa.OpAMD64VBROADCASTSSMasked512,
- ssa.OpAMD64VBROADCASTSDMasked512,
- ssa.OpAMD64VPBROADCASTBMasked512,
- ssa.OpAMD64VPBROADCASTWMasked512,
+ ssa.OpAMD64VPBROADCASTBMasked128,
+ ssa.OpAMD64VPBROADCASTWMasked256,
ssa.OpAMD64VPBROADCASTDMasked512,
- ssa.OpAMD64VPBROADCASTQMasked512,
+ ssa.OpAMD64VPBROADCASTBMasked256,
+ ssa.OpAMD64VPBROADCASTWMasked512,
+ ssa.OpAMD64VPBROADCASTBMasked512,
ssa.OpAMD64VRNDSCALEPSMasked128,
ssa.OpAMD64VRNDSCALEPSMasked128load,
ssa.OpAMD64VRNDSCALEPSMasked256,
(AverageUint16x8 ...) => (VPAVGW128 ...)
(AverageUint16x16 ...) => (VPAVGW256 ...)
(AverageUint16x32 ...) => (VPAVGW512 ...)
-(Broadcast128Float32x4 ...) => (VBROADCASTSS128 ...)
-(Broadcast128Float64x2 ...) => (VPBROADCASTQ128 ...)
-(Broadcast128Int8x16 ...) => (VPBROADCASTB128 ...)
-(Broadcast128Int16x8 ...) => (VPBROADCASTW128 ...)
-(Broadcast128Int32x4 ...) => (VPBROADCASTD128 ...)
-(Broadcast128Int64x2 ...) => (VPBROADCASTQ128 ...)
-(Broadcast128Uint8x16 ...) => (VPBROADCASTB128 ...)
-(Broadcast128Uint16x8 ...) => (VPBROADCASTW128 ...)
-(Broadcast128Uint32x4 ...) => (VPBROADCASTD128 ...)
-(Broadcast128Uint64x2 ...) => (VPBROADCASTQ128 ...)
-(Broadcast256Float32x4 ...) => (VBROADCASTSS256 ...)
-(Broadcast256Float64x2 ...) => (VBROADCASTSD256 ...)
-(Broadcast256Int8x16 ...) => (VPBROADCASTB256 ...)
-(Broadcast256Int16x8 ...) => (VPBROADCASTW256 ...)
-(Broadcast256Int32x4 ...) => (VPBROADCASTD256 ...)
-(Broadcast256Int64x2 ...) => (VPBROADCASTQ256 ...)
-(Broadcast256Uint8x16 ...) => (VPBROADCASTB256 ...)
-(Broadcast256Uint16x8 ...) => (VPBROADCASTW256 ...)
-(Broadcast256Uint32x4 ...) => (VPBROADCASTD256 ...)
-(Broadcast256Uint64x2 ...) => (VPBROADCASTQ256 ...)
-(Broadcast512Float32x4 ...) => (VBROADCASTSS512 ...)
-(Broadcast512Float64x2 ...) => (VBROADCASTSD512 ...)
-(Broadcast512Int8x16 ...) => (VPBROADCASTB512 ...)
-(Broadcast512Int16x8 ...) => (VPBROADCASTW512 ...)
-(Broadcast512Int32x4 ...) => (VPBROADCASTD512 ...)
-(Broadcast512Int64x2 ...) => (VPBROADCASTQ512 ...)
-(Broadcast512Uint8x16 ...) => (VPBROADCASTB512 ...)
-(Broadcast512Uint16x8 ...) => (VPBROADCASTW512 ...)
-(Broadcast512Uint32x4 ...) => (VPBROADCASTD512 ...)
-(Broadcast512Uint64x2 ...) => (VPBROADCASTQ512 ...)
+(Broadcast1To2Float64x2 ...) => (VPBROADCASTQ128 ...)
+(Broadcast1To2Int64x2 ...) => (VPBROADCASTQ128 ...)
+(Broadcast1To2Uint64x2 ...) => (VPBROADCASTQ128 ...)
+(Broadcast1To4Float32x4 ...) => (VBROADCASTSS128 ...)
+(Broadcast1To4Float64x2 ...) => (VBROADCASTSD256 ...)
+(Broadcast1To4Int32x4 ...) => (VPBROADCASTD128 ...)
+(Broadcast1To4Int64x2 ...) => (VPBROADCASTQ256 ...)
+(Broadcast1To4Uint32x4 ...) => (VPBROADCASTD128 ...)
+(Broadcast1To4Uint64x2 ...) => (VPBROADCASTQ256 ...)
+(Broadcast1To8Float32x4 ...) => (VBROADCASTSS256 ...)
+(Broadcast1To8Float64x2 ...) => (VBROADCASTSD512 ...)
+(Broadcast1To8Int16x8 ...) => (VPBROADCASTW128 ...)
+(Broadcast1To8Int32x4 ...) => (VPBROADCASTD256 ...)
+(Broadcast1To8Int64x2 ...) => (VPBROADCASTQ512 ...)
+(Broadcast1To8Uint16x8 ...) => (VPBROADCASTW128 ...)
+(Broadcast1To8Uint32x4 ...) => (VPBROADCASTD256 ...)
+(Broadcast1To8Uint64x2 ...) => (VPBROADCASTQ512 ...)
+(Broadcast1To16Float32x4 ...) => (VBROADCASTSS512 ...)
+(Broadcast1To16Int8x16 ...) => (VPBROADCASTB128 ...)
+(Broadcast1To16Int16x8 ...) => (VPBROADCASTW256 ...)
+(Broadcast1To16Int32x4 ...) => (VPBROADCASTD512 ...)
+(Broadcast1To16Uint8x16 ...) => (VPBROADCASTB128 ...)
+(Broadcast1To16Uint16x8 ...) => (VPBROADCASTW256 ...)
+(Broadcast1To16Uint32x4 ...) => (VPBROADCASTD512 ...)
+(Broadcast1To32Int8x16 ...) => (VPBROADCASTB256 ...)
+(Broadcast1To32Int16x8 ...) => (VPBROADCASTW512 ...)
+(Broadcast1To32Uint8x16 ...) => (VPBROADCASTB256 ...)
+(Broadcast1To32Uint16x8 ...) => (VPBROADCASTW512 ...)
+(Broadcast1To64Int8x16 ...) => (VPBROADCASTB512 ...)
+(Broadcast1To64Uint8x16 ...) => (VPBROADCASTB512 ...)
(CeilFloat32x4 x) => (VROUNDPS128 [2] x)
(CeilFloat32x8 x) => (VROUNDPS256 [2] x)
(CeilFloat64x2 x) => (VROUNDPD128 [2] x)
(VMOVDQU16Masked128 (VPAVGW128 x y) mask) => (VPAVGWMasked128 x y mask)
(VMOVDQU16Masked256 (VPAVGW256 x y) mask) => (VPAVGWMasked256 x y mask)
(VMOVDQU16Masked512 (VPAVGW512 x y) mask) => (VPAVGWMasked512 x y mask)
-(VMOVDQU32Masked128 (VBROADCASTSS128 x) mask) => (VBROADCASTSSMasked128 x mask)
(VMOVDQU64Masked128 (VPBROADCASTQ128 x) mask) => (VPBROADCASTQMasked128 x mask)
-(VMOVDQU8Masked128 (VPBROADCASTB128 x) mask) => (VPBROADCASTBMasked128 x mask)
-(VMOVDQU16Masked128 (VPBROADCASTW128 x) mask) => (VPBROADCASTWMasked128 x mask)
+(VMOVDQU32Masked128 (VBROADCASTSS128 x) mask) => (VBROADCASTSSMasked128 x mask)
+(VMOVDQU64Masked256 (VBROADCASTSD256 x) mask) => (VBROADCASTSDMasked256 x mask)
(VMOVDQU32Masked128 (VPBROADCASTD128 x) mask) => (VPBROADCASTDMasked128 x mask)
+(VMOVDQU64Masked256 (VPBROADCASTQ256 x) mask) => (VPBROADCASTQMasked256 x mask)
(VMOVDQU32Masked256 (VBROADCASTSS256 x) mask) => (VBROADCASTSSMasked256 x mask)
-(VMOVDQU64Masked256 (VBROADCASTSD256 x) mask) => (VBROADCASTSDMasked256 x mask)
-(VMOVDQU8Masked256 (VPBROADCASTB256 x) mask) => (VPBROADCASTBMasked256 x mask)
-(VMOVDQU16Masked256 (VPBROADCASTW256 x) mask) => (VPBROADCASTWMasked256 x mask)
+(VMOVDQU64Masked512 (VBROADCASTSD512 x) mask) => (VBROADCASTSDMasked512 x mask)
+(VMOVDQU16Masked128 (VPBROADCASTW128 x) mask) => (VPBROADCASTWMasked128 x mask)
(VMOVDQU32Masked256 (VPBROADCASTD256 x) mask) => (VPBROADCASTDMasked256 x mask)
-(VMOVDQU64Masked256 (VPBROADCASTQ256 x) mask) => (VPBROADCASTQMasked256 x mask)
+(VMOVDQU64Masked512 (VPBROADCASTQ512 x) mask) => (VPBROADCASTQMasked512 x mask)
(VMOVDQU32Masked512 (VBROADCASTSS512 x) mask) => (VBROADCASTSSMasked512 x mask)
-(VMOVDQU64Masked512 (VBROADCASTSD512 x) mask) => (VBROADCASTSDMasked512 x mask)
-(VMOVDQU8Masked512 (VPBROADCASTB512 x) mask) => (VPBROADCASTBMasked512 x mask)
-(VMOVDQU16Masked512 (VPBROADCASTW512 x) mask) => (VPBROADCASTWMasked512 x mask)
+(VMOVDQU8Masked128 (VPBROADCASTB128 x) mask) => (VPBROADCASTBMasked128 x mask)
+(VMOVDQU16Masked256 (VPBROADCASTW256 x) mask) => (VPBROADCASTWMasked256 x mask)
(VMOVDQU32Masked512 (VPBROADCASTD512 x) mask) => (VPBROADCASTDMasked512 x mask)
-(VMOVDQU64Masked512 (VPBROADCASTQ512 x) mask) => (VPBROADCASTQMasked512 x mask)
+(VMOVDQU8Masked256 (VPBROADCASTB256 x) mask) => (VPBROADCASTBMasked256 x mask)
+(VMOVDQU16Masked512 (VPBROADCASTW512 x) mask) => (VPBROADCASTWMasked512 x mask)
+(VMOVDQU8Masked512 (VPBROADCASTB512 x) mask) => (VPBROADCASTBMasked512 x mask)
(VMOVDQU32Masked128 (VRNDSCALEPS128 [a] x) mask) => (VRNDSCALEPSMasked128 [a] x mask)
(VMOVDQU32Masked256 (VRNDSCALEPS256 [a] x) mask) => (VRNDSCALEPSMasked256 [a] x mask)
(VMOVDQU32Masked512 (VRNDSCALEPS512 [a] x) mask) => (VRNDSCALEPSMasked512 [a] x mask)
{name: "AverageUint16x8", argLength: 2, commutative: true},
{name: "AverageUint16x16", argLength: 2, commutative: true},
{name: "AverageUint16x32", argLength: 2, commutative: true},
- {name: "Broadcast128Float32x4", argLength: 1, commutative: false},
- {name: "Broadcast128Float64x2", argLength: 1, commutative: false},
- {name: "Broadcast128Int8x16", argLength: 1, commutative: false},
- {name: "Broadcast128Int16x8", argLength: 1, commutative: false},
- {name: "Broadcast128Int32x4", argLength: 1, commutative: false},
- {name: "Broadcast128Int64x2", argLength: 1, commutative: false},
- {name: "Broadcast128Uint8x16", argLength: 1, commutative: false},
- {name: "Broadcast128Uint16x8", argLength: 1, commutative: false},
- {name: "Broadcast128Uint32x4", argLength: 1, commutative: false},
- {name: "Broadcast128Uint64x2", argLength: 1, commutative: false},
- {name: "Broadcast256Float32x4", argLength: 1, commutative: false},
- {name: "Broadcast256Float64x2", argLength: 1, commutative: false},
- {name: "Broadcast256Int8x16", argLength: 1, commutative: false},
- {name: "Broadcast256Int16x8", argLength: 1, commutative: false},
- {name: "Broadcast256Int32x4", argLength: 1, commutative: false},
- {name: "Broadcast256Int64x2", argLength: 1, commutative: false},
- {name: "Broadcast256Uint8x16", argLength: 1, commutative: false},
- {name: "Broadcast256Uint16x8", argLength: 1, commutative: false},
- {name: "Broadcast256Uint32x4", argLength: 1, commutative: false},
- {name: "Broadcast256Uint64x2", argLength: 1, commutative: false},
- {name: "Broadcast512Float32x4", argLength: 1, commutative: false},
- {name: "Broadcast512Float64x2", argLength: 1, commutative: false},
- {name: "Broadcast512Int8x16", argLength: 1, commutative: false},
- {name: "Broadcast512Int16x8", argLength: 1, commutative: false},
- {name: "Broadcast512Int32x4", argLength: 1, commutative: false},
- {name: "Broadcast512Int64x2", argLength: 1, commutative: false},
- {name: "Broadcast512Uint8x16", argLength: 1, commutative: false},
- {name: "Broadcast512Uint16x8", argLength: 1, commutative: false},
- {name: "Broadcast512Uint32x4", argLength: 1, commutative: false},
- {name: "Broadcast512Uint64x2", argLength: 1, commutative: false},
+ {name: "Broadcast1To2Float64x2", argLength: 1, commutative: false},
+ {name: "Broadcast1To2Int64x2", argLength: 1, commutative: false},
+ {name: "Broadcast1To2Uint64x2", argLength: 1, commutative: false},
+ {name: "Broadcast1To4Float32x4", argLength: 1, commutative: false},
+ {name: "Broadcast1To4Float64x2", argLength: 1, commutative: false},
+ {name: "Broadcast1To4Int32x4", argLength: 1, commutative: false},
+ {name: "Broadcast1To4Int64x2", argLength: 1, commutative: false},
+ {name: "Broadcast1To4Uint32x4", argLength: 1, commutative: false},
+ {name: "Broadcast1To4Uint64x2", argLength: 1, commutative: false},
+ {name: "Broadcast1To8Float32x4", argLength: 1, commutative: false},
+ {name: "Broadcast1To8Float64x2", argLength: 1, commutative: false},
+ {name: "Broadcast1To8Int16x8", argLength: 1, commutative: false},
+ {name: "Broadcast1To8Int32x4", argLength: 1, commutative: false},
+ {name: "Broadcast1To8Int64x2", argLength: 1, commutative: false},
+ {name: "Broadcast1To8Uint16x8", argLength: 1, commutative: false},
+ {name: "Broadcast1To8Uint32x4", argLength: 1, commutative: false},
+ {name: "Broadcast1To8Uint64x2", argLength: 1, commutative: false},
+ {name: "Broadcast1To16Float32x4", argLength: 1, commutative: false},
+ {name: "Broadcast1To16Int8x16", argLength: 1, commutative: false},
+ {name: "Broadcast1To16Int16x8", argLength: 1, commutative: false},
+ {name: "Broadcast1To16Int32x4", argLength: 1, commutative: false},
+ {name: "Broadcast1To16Uint8x16", argLength: 1, commutative: false},
+ {name: "Broadcast1To16Uint16x8", argLength: 1, commutative: false},
+ {name: "Broadcast1To16Uint32x4", argLength: 1, commutative: false},
+ {name: "Broadcast1To32Int8x16", argLength: 1, commutative: false},
+ {name: "Broadcast1To32Int16x8", argLength: 1, commutative: false},
+ {name: "Broadcast1To32Uint8x16", argLength: 1, commutative: false},
+ {name: "Broadcast1To32Uint16x8", argLength: 1, commutative: false},
+ {name: "Broadcast1To64Int8x16", argLength: 1, commutative: false},
+ {name: "Broadcast1To64Uint8x16", argLength: 1, commutative: false},
{name: "CeilFloat32x4", argLength: 1, commutative: false},
{name: "CeilFloat32x8", argLength: 1, commutative: false},
{name: "CeilFloat64x2", argLength: 1, commutative: false},
OpAverageUint16x8
OpAverageUint16x16
OpAverageUint16x32
- OpBroadcast128Float32x4
- OpBroadcast128Float64x2
- OpBroadcast128Int8x16
- OpBroadcast128Int16x8
- OpBroadcast128Int32x4
- OpBroadcast128Int64x2
- OpBroadcast128Uint8x16
- OpBroadcast128Uint16x8
- OpBroadcast128Uint32x4
- OpBroadcast128Uint64x2
- OpBroadcast256Float32x4
- OpBroadcast256Float64x2
- OpBroadcast256Int8x16
- OpBroadcast256Int16x8
- OpBroadcast256Int32x4
- OpBroadcast256Int64x2
- OpBroadcast256Uint8x16
- OpBroadcast256Uint16x8
- OpBroadcast256Uint32x4
- OpBroadcast256Uint64x2
- OpBroadcast512Float32x4
- OpBroadcast512Float64x2
- OpBroadcast512Int8x16
- OpBroadcast512Int16x8
- OpBroadcast512Int32x4
- OpBroadcast512Int64x2
- OpBroadcast512Uint8x16
- OpBroadcast512Uint16x8
- OpBroadcast512Uint32x4
- OpBroadcast512Uint64x2
+ OpBroadcast1To2Float64x2
+ OpBroadcast1To2Int64x2
+ OpBroadcast1To2Uint64x2
+ OpBroadcast1To4Float32x4
+ OpBroadcast1To4Float64x2
+ OpBroadcast1To4Int32x4
+ OpBroadcast1To4Int64x2
+ OpBroadcast1To4Uint32x4
+ OpBroadcast1To4Uint64x2
+ OpBroadcast1To8Float32x4
+ OpBroadcast1To8Float64x2
+ OpBroadcast1To8Int16x8
+ OpBroadcast1To8Int32x4
+ OpBroadcast1To8Int64x2
+ OpBroadcast1To8Uint16x8
+ OpBroadcast1To8Uint32x4
+ OpBroadcast1To8Uint64x2
+ OpBroadcast1To16Float32x4
+ OpBroadcast1To16Int8x16
+ OpBroadcast1To16Int16x8
+ OpBroadcast1To16Int32x4
+ OpBroadcast1To16Uint8x16
+ OpBroadcast1To16Uint16x8
+ OpBroadcast1To16Uint32x4
+ OpBroadcast1To32Int8x16
+ OpBroadcast1To32Int16x8
+ OpBroadcast1To32Uint8x16
+ OpBroadcast1To32Uint16x8
+ OpBroadcast1To64Int8x16
+ OpBroadcast1To64Uint8x16
OpCeilFloat32x4
OpCeilFloat32x8
OpCeilFloat64x2
generic: true,
},
{
- name: "Broadcast128Float32x4",
+ name: "Broadcast1To2Float64x2",
argLen: 1,
generic: true,
},
{
- name: "Broadcast128Float64x2",
+ name: "Broadcast1To2Int64x2",
argLen: 1,
generic: true,
},
{
- name: "Broadcast128Int8x16",
+ name: "Broadcast1To2Uint64x2",
argLen: 1,
generic: true,
},
{
- name: "Broadcast128Int16x8",
+ name: "Broadcast1To4Float32x4",
argLen: 1,
generic: true,
},
{
- name: "Broadcast128Int32x4",
+ name: "Broadcast1To4Float64x2",
argLen: 1,
generic: true,
},
{
- name: "Broadcast128Int64x2",
+ name: "Broadcast1To4Int32x4",
argLen: 1,
generic: true,
},
{
- name: "Broadcast128Uint8x16",
+ name: "Broadcast1To4Int64x2",
argLen: 1,
generic: true,
},
{
- name: "Broadcast128Uint16x8",
+ name: "Broadcast1To4Uint32x4",
argLen: 1,
generic: true,
},
{
- name: "Broadcast128Uint32x4",
+ name: "Broadcast1To4Uint64x2",
argLen: 1,
generic: true,
},
{
- name: "Broadcast128Uint64x2",
+ name: "Broadcast1To8Float32x4",
argLen: 1,
generic: true,
},
{
- name: "Broadcast256Float32x4",
+ name: "Broadcast1To8Float64x2",
argLen: 1,
generic: true,
},
{
- name: "Broadcast256Float64x2",
+ name: "Broadcast1To8Int16x8",
argLen: 1,
generic: true,
},
{
- name: "Broadcast256Int8x16",
+ name: "Broadcast1To8Int32x4",
argLen: 1,
generic: true,
},
{
- name: "Broadcast256Int16x8",
+ name: "Broadcast1To8Int64x2",
argLen: 1,
generic: true,
},
{
- name: "Broadcast256Int32x4",
+ name: "Broadcast1To8Uint16x8",
argLen: 1,
generic: true,
},
{
- name: "Broadcast256Int64x2",
+ name: "Broadcast1To8Uint32x4",
argLen: 1,
generic: true,
},
{
- name: "Broadcast256Uint8x16",
+ name: "Broadcast1To8Uint64x2",
argLen: 1,
generic: true,
},
{
- name: "Broadcast256Uint16x8",
+ name: "Broadcast1To16Float32x4",
argLen: 1,
generic: true,
},
{
- name: "Broadcast256Uint32x4",
+ name: "Broadcast1To16Int8x16",
argLen: 1,
generic: true,
},
{
- name: "Broadcast256Uint64x2",
+ name: "Broadcast1To16Int16x8",
argLen: 1,
generic: true,
},
{
- name: "Broadcast512Float32x4",
+ name: "Broadcast1To16Int32x4",
argLen: 1,
generic: true,
},
{
- name: "Broadcast512Float64x2",
+ name: "Broadcast1To16Uint8x16",
argLen: 1,
generic: true,
},
{
- name: "Broadcast512Int8x16",
+ name: "Broadcast1To16Uint16x8",
argLen: 1,
generic: true,
},
{
- name: "Broadcast512Int16x8",
+ name: "Broadcast1To16Uint32x4",
argLen: 1,
generic: true,
},
{
- name: "Broadcast512Int32x4",
+ name: "Broadcast1To32Int8x16",
argLen: 1,
generic: true,
},
{
- name: "Broadcast512Int64x2",
+ name: "Broadcast1To32Int16x8",
argLen: 1,
generic: true,
},
{
- name: "Broadcast512Uint8x16",
+ name: "Broadcast1To32Uint8x16",
argLen: 1,
generic: true,
},
{
- name: "Broadcast512Uint16x8",
+ name: "Broadcast1To32Uint16x8",
argLen: 1,
generic: true,
},
{
- name: "Broadcast512Uint32x4",
+ name: "Broadcast1To64Int8x16",
argLen: 1,
generic: true,
},
{
- name: "Broadcast512Uint64x2",
+ name: "Broadcast1To64Uint8x16",
argLen: 1,
generic: true,
},
return rewriteValueAMD64_OpBitLen64(v)
case OpBitLen8:
return rewriteValueAMD64_OpBitLen8(v)
- case OpBroadcast128Float32x4:
- v.Op = OpAMD64VBROADCASTSS128
+ case OpBroadcast1To16Float32x4:
+ v.Op = OpAMD64VBROADCASTSS512
return true
- case OpBroadcast128Float64x2:
- v.Op = OpAMD64VPBROADCASTQ128
+ case OpBroadcast1To16Int16x8:
+ v.Op = OpAMD64VPBROADCASTW256
return true
- case OpBroadcast128Int16x8:
- v.Op = OpAMD64VPBROADCASTW128
+ case OpBroadcast1To16Int32x4:
+ v.Op = OpAMD64VPBROADCASTD512
return true
- case OpBroadcast128Int32x4:
- v.Op = OpAMD64VPBROADCASTD128
+ case OpBroadcast1To16Int8x16:
+ v.Op = OpAMD64VPBROADCASTB128
return true
- case OpBroadcast128Int64x2:
- v.Op = OpAMD64VPBROADCASTQ128
+ case OpBroadcast1To16Uint16x8:
+ v.Op = OpAMD64VPBROADCASTW256
return true
- case OpBroadcast128Int8x16:
+ case OpBroadcast1To16Uint32x4:
+ v.Op = OpAMD64VPBROADCASTD512
+ return true
+ case OpBroadcast1To16Uint8x16:
v.Op = OpAMD64VPBROADCASTB128
return true
- case OpBroadcast128Uint16x8:
- v.Op = OpAMD64VPBROADCASTW128
+ case OpBroadcast1To2Float64x2:
+ v.Op = OpAMD64VPBROADCASTQ128
return true
- case OpBroadcast128Uint32x4:
- v.Op = OpAMD64VPBROADCASTD128
+ case OpBroadcast1To2Int64x2:
+ v.Op = OpAMD64VPBROADCASTQ128
return true
- case OpBroadcast128Uint64x2:
+ case OpBroadcast1To2Uint64x2:
v.Op = OpAMD64VPBROADCASTQ128
return true
- case OpBroadcast128Uint8x16:
- v.Op = OpAMD64VPBROADCASTB128
+ case OpBroadcast1To32Int16x8:
+ v.Op = OpAMD64VPBROADCASTW512
return true
- case OpBroadcast256Float32x4:
- v.Op = OpAMD64VBROADCASTSS256
+ case OpBroadcast1To32Int8x16:
+ v.Op = OpAMD64VPBROADCASTB256
return true
- case OpBroadcast256Float64x2:
- v.Op = OpAMD64VBROADCASTSD256
+ case OpBroadcast1To32Uint16x8:
+ v.Op = OpAMD64VPBROADCASTW512
return true
- case OpBroadcast256Int16x8:
- v.Op = OpAMD64VPBROADCASTW256
+ case OpBroadcast1To32Uint8x16:
+ v.Op = OpAMD64VPBROADCASTB256
return true
- case OpBroadcast256Int32x4:
- v.Op = OpAMD64VPBROADCASTD256
+ case OpBroadcast1To4Float32x4:
+ v.Op = OpAMD64VBROADCASTSS128
return true
- case OpBroadcast256Int64x2:
- v.Op = OpAMD64VPBROADCASTQ256
+ case OpBroadcast1To4Float64x2:
+ v.Op = OpAMD64VBROADCASTSD256
return true
- case OpBroadcast256Int8x16:
- v.Op = OpAMD64VPBROADCASTB256
+ case OpBroadcast1To4Int32x4:
+ v.Op = OpAMD64VPBROADCASTD128
return true
- case OpBroadcast256Uint16x8:
- v.Op = OpAMD64VPBROADCASTW256
+ case OpBroadcast1To4Int64x2:
+ v.Op = OpAMD64VPBROADCASTQ256
return true
- case OpBroadcast256Uint32x4:
- v.Op = OpAMD64VPBROADCASTD256
+ case OpBroadcast1To4Uint32x4:
+ v.Op = OpAMD64VPBROADCASTD128
return true
- case OpBroadcast256Uint64x2:
+ case OpBroadcast1To4Uint64x2:
v.Op = OpAMD64VPBROADCASTQ256
return true
- case OpBroadcast256Uint8x16:
- v.Op = OpAMD64VPBROADCASTB256
+ case OpBroadcast1To64Int8x16:
+ v.Op = OpAMD64VPBROADCASTB512
return true
- case OpBroadcast512Float32x4:
- v.Op = OpAMD64VBROADCASTSS512
+ case OpBroadcast1To64Uint8x16:
+ v.Op = OpAMD64VPBROADCASTB512
+ return true
+ case OpBroadcast1To8Float32x4:
+ v.Op = OpAMD64VBROADCASTSS256
return true
- case OpBroadcast512Float64x2:
+ case OpBroadcast1To8Float64x2:
v.Op = OpAMD64VBROADCASTSD512
return true
- case OpBroadcast512Int16x8:
- v.Op = OpAMD64VPBROADCASTW512
+ case OpBroadcast1To8Int16x8:
+ v.Op = OpAMD64VPBROADCASTW128
return true
- case OpBroadcast512Int32x4:
- v.Op = OpAMD64VPBROADCASTD512
+ case OpBroadcast1To8Int32x4:
+ v.Op = OpAMD64VPBROADCASTD256
return true
- case OpBroadcast512Int64x2:
+ case OpBroadcast1To8Int64x2:
v.Op = OpAMD64VPBROADCASTQ512
return true
- case OpBroadcast512Int8x16:
- v.Op = OpAMD64VPBROADCASTB512
- return true
- case OpBroadcast512Uint16x8:
- v.Op = OpAMD64VPBROADCASTW512
+ case OpBroadcast1To8Uint16x8:
+ v.Op = OpAMD64VPBROADCASTW128
return true
- case OpBroadcast512Uint32x4:
- v.Op = OpAMD64VPBROADCASTD512
+ case OpBroadcast1To8Uint32x4:
+ v.Op = OpAMD64VPBROADCASTD256
return true
- case OpBroadcast512Uint64x2:
+ case OpBroadcast1To8Uint64x2:
v.Op = OpAMD64VPBROADCASTQ512
return true
- case OpBroadcast512Uint8x16:
- v.Op = OpAMD64VPBROADCASTB512
- return true
case OpBswap16:
return rewriteValueAMD64_OpBswap16(v)
case OpBswap32:
addF(simdPackage, "Uint16x8.Average", opLen2(ssa.OpAverageUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.Average", opLen2(ssa.OpAverageUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.Average", opLen2(ssa.OpAverageUint16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float32x4.Broadcast128", opLen1(ssa.OpBroadcast128Float32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float64x2.Broadcast128", opLen1(ssa.OpBroadcast128Float64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x16.Broadcast128", opLen1(ssa.OpBroadcast128Int8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x8.Broadcast128", opLen1(ssa.OpBroadcast128Int16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x4.Broadcast128", opLen1(ssa.OpBroadcast128Int32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int64x2.Broadcast128", opLen1(ssa.OpBroadcast128Int64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint8x16.Broadcast128", opLen1(ssa.OpBroadcast128Uint8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint16x8.Broadcast128", opLen1(ssa.OpBroadcast128Uint16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint32x4.Broadcast128", opLen1(ssa.OpBroadcast128Uint32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint64x2.Broadcast128", opLen1(ssa.OpBroadcast128Uint64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x4.Broadcast256", opLen1(ssa.OpBroadcast256Float32x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float64x2.Broadcast256", opLen1(ssa.OpBroadcast256Float64x2, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x16.Broadcast256", opLen1(ssa.OpBroadcast256Int8x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int16x8.Broadcast256", opLen1(ssa.OpBroadcast256Int16x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int32x4.Broadcast256", opLen1(ssa.OpBroadcast256Int32x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int64x2.Broadcast256", opLen1(ssa.OpBroadcast256Int64x2, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint8x16.Broadcast256", opLen1(ssa.OpBroadcast256Uint8x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint16x8.Broadcast256", opLen1(ssa.OpBroadcast256Uint16x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint32x4.Broadcast256", opLen1(ssa.OpBroadcast256Uint32x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint64x2.Broadcast256", opLen1(ssa.OpBroadcast256Uint64x2, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x4.Broadcast512", opLen1(ssa.OpBroadcast512Float32x4, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float64x2.Broadcast512", opLen1(ssa.OpBroadcast512Float64x2, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int8x16.Broadcast512", opLen1(ssa.OpBroadcast512Int8x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int16x8.Broadcast512", opLen1(ssa.OpBroadcast512Int16x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int32x4.Broadcast512", opLen1(ssa.OpBroadcast512Int32x4, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int64x2.Broadcast512", opLen1(ssa.OpBroadcast512Int64x2, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint8x16.Broadcast512", opLen1(ssa.OpBroadcast512Uint8x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint16x8.Broadcast512", opLen1(ssa.OpBroadcast512Uint16x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint32x4.Broadcast512", opLen1(ssa.OpBroadcast512Uint32x4, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint64x2.Broadcast512", opLen1(ssa.OpBroadcast512Uint64x2, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x2.Broadcast1To2", opLen1(ssa.OpBroadcast1To2Float64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int64x2.Broadcast1To2", opLen1(ssa.OpBroadcast1To2Int64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint64x2.Broadcast1To2", opLen1(ssa.OpBroadcast1To2Uint64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x4.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Float32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x2.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Float64x2, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x4.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Int32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int64x2.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Int64x2, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x4.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Uint32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint64x2.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Uint64x2, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x4.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Float32x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x2.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Float64x2, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int16x8.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Int16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x4.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Int32x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int64x2.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Int64x2, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint16x8.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Uint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x4.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Uint32x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint64x2.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Uint64x2, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Float32x4, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Int8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x8.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Int16x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x4.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Int32x4, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x16.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Uint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x8.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Uint16x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x4.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Uint32x4, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Int8x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x8.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Int16x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x16.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Uint8x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x8.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Uint16x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.Broadcast1To64", opLen1(ssa.OpBroadcast1To64Int8x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x16.Broadcast1To64", opLen1(ssa.OpBroadcast1To64Uint8x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.Ceil", opLen1(ssa.OpCeilFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Ceil", opLen1(ssa.OpCeilFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x2.Ceil", opLen1(ssa.OpCeilFloat64x2, types.TypeVec128), sys.AMD64)
documentation: !string |-
// NAME performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
-- go: Broadcast128
+- go: Broadcast1To2
commutative: false
documentation: !string |-
- // NAME copies element zero of its (128-bit) input to all elements of
- // the 128-bit output vector.
-- go: Broadcast256
+ // NAME copies the lowest element of its input to all 2 elements of
+ // the output vector.
+- go: Broadcast1To4
commutative: false
documentation: !string |-
- // NAME copies element zero of its (128-bit) input to all elements of
- // the 256-bit output vector.
-- go: Broadcast512
+ // NAME copies the lowest element of its input to all 4 elements of
+ // the output vector.
+- go: Broadcast1To8
commutative: false
documentation: !string |-
- // NAME copies element zero of its (128-bit) input to all elements of
- // the 512-bit output vector.
+ // NAME copies the lowest element of its input to all 8 elements of
+ // the output vector.
+- go: Broadcast1To16
+ commutative: false
+ documentation: !string |-
+ // NAME copies the lowest element of its input to all 16 elements of
+ // the output vector.
+- go: Broadcast1To32
+ commutative: false
+ documentation: !string |-
+ // NAME copies the lowest element of its input to all 32 elements of
+ // the output vector.
+- go: Broadcast1To64
+ commutative: false
+ documentation: !string |-
+ // NAME copies the lowest element of its input to all 64 elements of
+ // the output vector.
- go: PermuteOrZeroGrouped
commutative: false
documentation: !string |- # Detailed documentation will rely on the specific ops.
out:
- *any
-- go: Broadcast128
- asm: VPBROADCAST[BWDQ]
+- go: Broadcast1To2
+ asm: VPBROADCASTQ
in:
- class: vreg
bits: 128
- elemBits: $e
+ elemBits: 64
base: $b
out:
- class: vreg
bits: 128
- elemBits: $e
+ elemBits: 64
base: $b
# weirdly, this one case on AVX2 is memory-operand-only
-- go: Broadcast128
+- go: Broadcast1To2
asm: VPBROADCASTQ
in:
- class: vreg
base: int
OverwriteBase: float
-- go: Broadcast256
+- go: Broadcast1To4
asm: VPBROADCAST[BWDQ]
in:
- class: vreg
bits: 128
- elemBits: $e
base: $b
out:
- class: vreg
- bits: 256
- elemBits: $e
+ lanes: 4
base: $b
-- go: Broadcast512
+- go: Broadcast1To8
asm: VPBROADCAST[BWDQ]
in:
- class: vreg
bits: 128
- elemBits: $e
base: $b
out:
- class: vreg
- bits: 512
- elemBits: $e
+ lanes: 8
base: $b
-- go: Broadcast128
- asm: VBROADCASTS[SD]
+- go: Broadcast1To16
+ asm: VPBROADCAST[BWDQ]
in:
- class: vreg
bits: 128
- elemBits: $e
base: $b
out:
- class: vreg
- bits: 128
- elemBits: $e
+ lanes: 16
base: $b
-- go: Broadcast256
- asm: VBROADCASTS[SD]
+- go: Broadcast1To32
+ asm: VPBROADCAST[BWDQ]
in:
- class: vreg
bits: 128
- elemBits: $e
base: $b
out:
- class: vreg
- bits: 256
- elemBits: $e
+ lanes: 32
base: $b
-- go: Broadcast512
- asm: VBROADCASTS[SD]
+- go: Broadcast1To64
+ asm: VPBROADCASTB
in:
- class: vreg
bits: 128
- elemBits: $e
base: $b
out:
- class: vreg
- bits: 512
- elemBits: $e
+ lanes: 64
base: $b
+- go: Broadcast1To4
+ asm: VBROADCASTS[SD]
+ in:
+ - class: vreg
+ bits: 128
+ base: float
+ out:
+ - class: vreg
+ lanes: 4
+ base: float
+
+- go: Broadcast1To8
+ asm: VBROADCASTS[SD]
+ in:
+ - class: vreg
+ bits: 128
+ base: float
+ out:
+ - class: vreg
+ lanes: 8
+ base: float
+
+- go: Broadcast1To16
+ asm: VBROADCASTS[SD]
+ in:
+ - class: vreg
+ bits: 128
+ base: float
+ out:
+ - class: vreg
+ lanes: 16
+ base: float
+
# VPSHUFB for 128-bit byte shuffles will be picked with higher priority than VPERMB, given its lower CPU feature requirement. (It's AVX)
- go: PermuteOrZero
asm: VPSHUFB
// Emulated, CPU Feature: {{.CPUfeatureBC}}
func Broadcast{{.VType}}(x {{.Etype}}) {{.VType}} {
var z {{.As128BitVec }}
- return z.SetElem(0, x).Broadcast{{.Vwidth}}()
+ return z.SetElem(0, x).Broadcast1To{{.Count}}()
}
`)
// Asm: VPAVGW, CPU Feature: AVX512
func (x Uint16x32) Average(y Uint16x32) Uint16x32
-/* Broadcast128 */
+/* Broadcast1To2 */
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To2 copies the lowest element of its input to all 2 elements of
+// the output vector.
//
-// Asm: VBROADCASTSS, CPU Feature: AVX2
-func (x Float32x4) Broadcast128() Float32x4
+// Asm: VPBROADCASTQ, CPU Feature: AVX2
+func (x Float64x2) Broadcast1To2() Float64x2
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To2 copies the lowest element of its input to all 2 elements of
+// the output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
-func (x Float64x2) Broadcast128() Float64x2
+func (x Int64x2) Broadcast1To2() Int64x2
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To2 copies the lowest element of its input to all 2 elements of
+// the output vector.
//
-// Asm: VPBROADCASTB, CPU Feature: AVX2
-func (x Int8x16) Broadcast128() Int8x16
+// Asm: VPBROADCASTQ, CPU Feature: AVX2
+func (x Uint64x2) Broadcast1To2() Uint64x2
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX2
-func (x Int16x8) Broadcast128() Int16x8
+/* Broadcast1To4 */
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To4 copies the lowest element of its input to all 4 elements of
+// the output vector.
//
-// Asm: VPBROADCASTD, CPU Feature: AVX2
-func (x Int32x4) Broadcast128() Int32x4
+// Asm: VBROADCASTSS, CPU Feature: AVX2
+func (x Float32x4) Broadcast1To4() Float32x4
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To4 copies the lowest element of its input to all 4 elements of
+// the output vector.
//
-// Asm: VPBROADCASTQ, CPU Feature: AVX2
-func (x Int64x2) Broadcast128() Int64x2
+// Asm: VBROADCASTSD, CPU Feature: AVX2
+func (x Float64x2) Broadcast1To4() Float64x4
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To4 copies the lowest element of its input to all 4 elements of
+// the output vector.
//
-// Asm: VPBROADCASTB, CPU Feature: AVX2
-func (x Uint8x16) Broadcast128() Uint8x16
+// Asm: VPBROADCASTD, CPU Feature: AVX2
+func (x Int32x4) Broadcast1To4() Int32x4
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To4 copies the lowest element of its input to all 4 elements of
+// the output vector.
//
-// Asm: VPBROADCASTW, CPU Feature: AVX2
-func (x Uint16x8) Broadcast128() Uint16x8
+// Asm: VPBROADCASTQ, CPU Feature: AVX2
+func (x Int64x2) Broadcast1To4() Int64x4
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To4 copies the lowest element of its input to all 4 elements of
+// the output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX2
-func (x Uint32x4) Broadcast128() Uint32x4
+func (x Uint32x4) Broadcast1To4() Uint32x4
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To4 copies the lowest element of its input to all 4 elements of
+// the output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
-func (x Uint64x2) Broadcast128() Uint64x2
+func (x Uint64x2) Broadcast1To4() Uint64x4
-/* Broadcast256 */
+/* Broadcast1To8 */
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
//
// Asm: VBROADCASTSS, CPU Feature: AVX2
-func (x Float32x4) Broadcast256() Float32x8
-
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// Asm: VBROADCASTSD, CPU Feature: AVX2
-func (x Float64x2) Broadcast256() Float64x4
+func (x Float32x4) Broadcast1To8() Float32x8
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
//
-// Asm: VPBROADCASTB, CPU Feature: AVX2
-func (x Int8x16) Broadcast256() Int8x32
+// Asm: VBROADCASTSD, CPU Feature: AVX512
+func (x Float64x2) Broadcast1To8() Float64x8
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX2
-func (x Int16x8) Broadcast256() Int16x16
+func (x Int16x8) Broadcast1To8() Int16x8
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX2
-func (x Int32x4) Broadcast256() Int32x8
+func (x Int32x4) Broadcast1To8() Int32x8
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
//
-// Asm: VPBROADCASTQ, CPU Feature: AVX2
-func (x Int64x2) Broadcast256() Int64x4
-
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX2
-func (x Uint8x16) Broadcast256() Uint8x32
+// Asm: VPBROADCASTQ, CPU Feature: AVX512
+func (x Int64x2) Broadcast1To8() Int64x8
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX2
-func (x Uint16x8) Broadcast256() Uint16x16
+func (x Uint16x8) Broadcast1To8() Uint16x8
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX2
-func (x Uint32x4) Broadcast256() Uint32x8
+func (x Uint32x4) Broadcast1To8() Uint32x8
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
//
-// Asm: VPBROADCASTQ, CPU Feature: AVX2
-func (x Uint64x2) Broadcast256() Uint64x4
+// Asm: VPBROADCASTQ, CPU Feature: AVX512
+func (x Uint64x2) Broadcast1To8() Uint64x8
-/* Broadcast512 */
+/* Broadcast1To16 */
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To16 copies the lowest element of its input to all 16 elements of
+// the output vector.
//
// Asm: VBROADCASTSS, CPU Feature: AVX512
-func (x Float32x4) Broadcast512() Float32x16
+func (x Float32x4) Broadcast1To16() Float32x16
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To16 copies the lowest element of its input to all 16 elements of
+// the output vector.
//
-// Asm: VBROADCASTSD, CPU Feature: AVX512
-func (x Float64x2) Broadcast512() Float64x8
+// Asm: VPBROADCASTB, CPU Feature: AVX2
+func (x Int8x16) Broadcast1To16() Int8x16
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To16 copies the lowest element of its input to all 16 elements of
+// the output vector.
//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Int8x16) Broadcast512() Int8x64
+// Asm: VPBROADCASTW, CPU Feature: AVX2
+func (x Int16x8) Broadcast1To16() Int16x16
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To16 copies the lowest element of its input to all 16 elements of
+// the output vector.
//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Int16x8) Broadcast512() Int16x32
+// Asm: VPBROADCASTD, CPU Feature: AVX512
+func (x Int32x4) Broadcast1To16() Int32x16
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To16 copies the lowest element of its input to all 16 elements of
+// the output vector.
+//
+// Asm: VPBROADCASTB, CPU Feature: AVX2
+func (x Uint8x16) Broadcast1To16() Uint8x16
+
+// Broadcast1To16 copies the lowest element of its input to all 16 elements of
+// the output vector.
+//
+// Asm: VPBROADCASTW, CPU Feature: AVX2
+func (x Uint16x8) Broadcast1To16() Uint16x16
+
+// Broadcast1To16 copies the lowest element of its input to all 16 elements of
+// the output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Int32x4) Broadcast512() Int32x16
+func (x Uint32x4) Broadcast1To16() Uint32x16
+
+/* Broadcast1To32 */
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To32 copies the lowest element of its input to all 32 elements of
+// the output vector.
//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Int64x2) Broadcast512() Int64x8
+// Asm: VPBROADCASTB, CPU Feature: AVX2
+func (x Int8x16) Broadcast1To32() Int8x32
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To32 copies the lowest element of its input to all 32 elements of
+// the output vector.
//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Uint8x16) Broadcast512() Uint8x64
+// Asm: VPBROADCASTW, CPU Feature: AVX512
+func (x Int16x8) Broadcast1To32() Int16x32
+
+// Broadcast1To32 copies the lowest element of its input to all 32 elements of
+// the output vector.
+//
+// Asm: VPBROADCASTB, CPU Feature: AVX2
+func (x Uint8x16) Broadcast1To32() Uint8x32
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To32 copies the lowest element of its input to all 32 elements of
+// the output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Uint16x8) Broadcast512() Uint16x32
+func (x Uint16x8) Broadcast1To32() Uint16x32
+
+/* Broadcast1To64 */
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To64 copies the lowest element of its input to all 64 elements of
+// the output vector.
//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Uint32x4) Broadcast512() Uint32x16
+// Asm: VPBROADCASTB, CPU Feature: AVX512
+func (x Int8x16) Broadcast1To64() Int8x64
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To64 copies the lowest element of its input to all 64 elements of
+// the output vector.
//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Uint64x2) Broadcast512() Uint64x8
+// Asm: VPBROADCASTB, CPU Feature: AVX512
+func (x Uint8x16) Broadcast1To64() Uint8x64
/* Ceil */
// Emulated, CPU Feature: AVX2
func BroadcastInt8x16(x int8) Int8x16 {
var z Int8x16
- return z.SetElem(0, x).Broadcast128()
+ return z.SetElem(0, x).Broadcast1To16()
}
// BroadcastInt16x8 returns a vector with the input
// Emulated, CPU Feature: AVX2
func BroadcastInt16x8(x int16) Int16x8 {
var z Int16x8
- return z.SetElem(0, x).Broadcast128()
+ return z.SetElem(0, x).Broadcast1To8()
}
// BroadcastInt32x4 returns a vector with the input
// Emulated, CPU Feature: AVX2
func BroadcastInt32x4(x int32) Int32x4 {
var z Int32x4
- return z.SetElem(0, x).Broadcast128()
+ return z.SetElem(0, x).Broadcast1To4()
}
// BroadcastInt64x2 returns a vector with the input
// Emulated, CPU Feature: AVX2
func BroadcastInt64x2(x int64) Int64x2 {
var z Int64x2
- return z.SetElem(0, x).Broadcast128()
+ return z.SetElem(0, x).Broadcast1To2()
}
// BroadcastUint8x16 returns a vector with the input
// Emulated, CPU Feature: AVX2
func BroadcastUint8x16(x uint8) Uint8x16 {
var z Uint8x16
- return z.SetElem(0, x).Broadcast128()
+ return z.SetElem(0, x).Broadcast1To16()
}
// BroadcastUint16x8 returns a vector with the input
// Emulated, CPU Feature: AVX2
func BroadcastUint16x8(x uint16) Uint16x8 {
var z Uint16x8
- return z.SetElem(0, x).Broadcast128()
+ return z.SetElem(0, x).Broadcast1To8()
}
// BroadcastUint32x4 returns a vector with the input
// Emulated, CPU Feature: AVX2
func BroadcastUint32x4(x uint32) Uint32x4 {
var z Uint32x4
- return z.SetElem(0, x).Broadcast128()
+ return z.SetElem(0, x).Broadcast1To4()
}
// BroadcastUint64x2 returns a vector with the input
// Emulated, CPU Feature: AVX2
func BroadcastUint64x2(x uint64) Uint64x2 {
var z Uint64x2
- return z.SetElem(0, x).Broadcast128()
+ return z.SetElem(0, x).Broadcast1To2()
}
// BroadcastFloat32x4 returns a vector with the input
// Emulated, CPU Feature: AVX2
func BroadcastFloat32x4(x float32) Float32x4 {
var z Float32x4
- return z.SetElem(0, x).Broadcast128()
+ return z.SetElem(0, x).Broadcast1To4()
}
// BroadcastFloat64x2 returns a vector with the input
// Emulated, CPU Feature: AVX2
func BroadcastFloat64x2(x float64) Float64x2 {
var z Float64x2
- return z.SetElem(0, x).Broadcast128()
+ return z.SetElem(0, x).Broadcast1To2()
}
// BroadcastInt8x32 returns a vector with the input
// Emulated, CPU Feature: AVX2
func BroadcastInt8x32(x int8) Int8x32 {
var z Int8x16
- return z.SetElem(0, x).Broadcast256()
+ return z.SetElem(0, x).Broadcast1To32()
}
// BroadcastInt16x16 returns a vector with the input
// Emulated, CPU Feature: AVX2
func BroadcastInt16x16(x int16) Int16x16 {
var z Int16x8
- return z.SetElem(0, x).Broadcast256()
+ return z.SetElem(0, x).Broadcast1To16()
}
// BroadcastInt32x8 returns a vector with the input
// Emulated, CPU Feature: AVX2
func BroadcastInt32x8(x int32) Int32x8 {
var z Int32x4
- return z.SetElem(0, x).Broadcast256()
+ return z.SetElem(0, x).Broadcast1To8()
}
// BroadcastInt64x4 returns a vector with the input
// Emulated, CPU Feature: AVX2
func BroadcastInt64x4(x int64) Int64x4 {
var z Int64x2
- return z.SetElem(0, x).Broadcast256()
+ return z.SetElem(0, x).Broadcast1To4()
}
// BroadcastUint8x32 returns a vector with the input
// Emulated, CPU Feature: AVX2
func BroadcastUint8x32(x uint8) Uint8x32 {
var z Uint8x16
- return z.SetElem(0, x).Broadcast256()
+ return z.SetElem(0, x).Broadcast1To32()
}
// BroadcastUint16x16 returns a vector with the input
// Emulated, CPU Feature: AVX2
func BroadcastUint16x16(x uint16) Uint16x16 {
var z Uint16x8
- return z.SetElem(0, x).Broadcast256()
+ return z.SetElem(0, x).Broadcast1To16()
}
// BroadcastUint32x8 returns a vector with the input
// Emulated, CPU Feature: AVX2
func BroadcastUint32x8(x uint32) Uint32x8 {
var z Uint32x4
- return z.SetElem(0, x).Broadcast256()
+ return z.SetElem(0, x).Broadcast1To8()
}
// BroadcastUint64x4 returns a vector with the input
// Emulated, CPU Feature: AVX2
func BroadcastUint64x4(x uint64) Uint64x4 {
var z Uint64x2
- return z.SetElem(0, x).Broadcast256()
+ return z.SetElem(0, x).Broadcast1To4()
}
// BroadcastFloat32x8 returns a vector with the input
// Emulated, CPU Feature: AVX2
func BroadcastFloat32x8(x float32) Float32x8 {
var z Float32x4
- return z.SetElem(0, x).Broadcast256()
+ return z.SetElem(0, x).Broadcast1To8()
}
// BroadcastFloat64x4 returns a vector with the input
// Emulated, CPU Feature: AVX2
func BroadcastFloat64x4(x float64) Float64x4 {
var z Float64x2
- return z.SetElem(0, x).Broadcast256()
+ return z.SetElem(0, x).Broadcast1To4()
}
// BroadcastInt8x64 returns a vector with the input
// Emulated, CPU Feature: AVX512BW
func BroadcastInt8x64(x int8) Int8x64 {
var z Int8x16
- return z.SetElem(0, x).Broadcast512()
+ return z.SetElem(0, x).Broadcast1To64()
}
// BroadcastInt16x32 returns a vector with the input
// Emulated, CPU Feature: AVX512BW
func BroadcastInt16x32(x int16) Int16x32 {
var z Int16x8
- return z.SetElem(0, x).Broadcast512()
+ return z.SetElem(0, x).Broadcast1To32()
}
// BroadcastInt32x16 returns a vector with the input
// Emulated, CPU Feature: AVX512F
func BroadcastInt32x16(x int32) Int32x16 {
var z Int32x4
- return z.SetElem(0, x).Broadcast512()
+ return z.SetElem(0, x).Broadcast1To16()
}
// BroadcastInt64x8 returns a vector with the input
// Emulated, CPU Feature: AVX512F
func BroadcastInt64x8(x int64) Int64x8 {
var z Int64x2
- return z.SetElem(0, x).Broadcast512()
+ return z.SetElem(0, x).Broadcast1To8()
}
// BroadcastUint8x64 returns a vector with the input
// Emulated, CPU Feature: AVX512BW
func BroadcastUint8x64(x uint8) Uint8x64 {
var z Uint8x16
- return z.SetElem(0, x).Broadcast512()
+ return z.SetElem(0, x).Broadcast1To64()
}
// BroadcastUint16x32 returns a vector with the input
// Emulated, CPU Feature: AVX512BW
func BroadcastUint16x32(x uint16) Uint16x32 {
var z Uint16x8
- return z.SetElem(0, x).Broadcast512()
+ return z.SetElem(0, x).Broadcast1To32()
}
// BroadcastUint32x16 returns a vector with the input
// Emulated, CPU Feature: AVX512F
func BroadcastUint32x16(x uint32) Uint32x16 {
var z Uint32x4
- return z.SetElem(0, x).Broadcast512()
+ return z.SetElem(0, x).Broadcast1To16()
}
// BroadcastUint64x8 returns a vector with the input
// Emulated, CPU Feature: AVX512F
func BroadcastUint64x8(x uint64) Uint64x8 {
var z Uint64x2
- return z.SetElem(0, x).Broadcast512()
+ return z.SetElem(0, x).Broadcast1To8()
}
// BroadcastFloat32x16 returns a vector with the input
// Emulated, CPU Feature: AVX512F
func BroadcastFloat32x16(x float32) Float32x16 {
var z Float32x4
- return z.SetElem(0, x).Broadcast512()
+ return z.SetElem(0, x).Broadcast1To16()
}
// BroadcastFloat64x8 returns a vector with the input
// Emulated, CPU Feature: AVX512F
func BroadcastFloat64x8(x float64) Float64x8 {
var z Float64x2
- return z.SetElem(0, x).Broadcast512()
+ return z.SetElem(0, x).Broadcast1To8()
}
// ToMask converts from Int8x16 to Mask8x16, mask element is set to true when the corresponding vector element is non-zero.