// AbsoluteMasked computes the absolute value of each element.
//
// Asm: VPABSB, CPU Feature: AVX512BW
-func (x Int8x16) AbsoluteMasked(y Mask8x16) Int8x16
+func (x Int8x16) AbsoluteMasked(mask Mask8x16) Int8x16
// AbsoluteMasked computes the absolute value of each element.
//
// Asm: VPABSB, CPU Feature: AVX512BW
-func (x Int8x32) AbsoluteMasked(y Mask8x32) Int8x32
+func (x Int8x32) AbsoluteMasked(mask Mask8x32) Int8x32
// AbsoluteMasked computes the absolute value of each element.
//
// Asm: VPABSB, CPU Feature: AVX512BW
-func (x Int8x64) AbsoluteMasked(y Mask8x64) Int8x64
+func (x Int8x64) AbsoluteMasked(mask Mask8x64) Int8x64
// AbsoluteMasked computes the absolute value of each element.
//
// Asm: VPABSW, CPU Feature: AVX512BW
-func (x Int16x8) AbsoluteMasked(y Mask16x8) Int16x8
+func (x Int16x8) AbsoluteMasked(mask Mask16x8) Int16x8
// AbsoluteMasked computes the absolute value of each element.
//
// Asm: VPABSW, CPU Feature: AVX512BW
-func (x Int16x16) AbsoluteMasked(y Mask16x16) Int16x16
+func (x Int16x16) AbsoluteMasked(mask Mask16x16) Int16x16
// AbsoluteMasked computes the absolute value of each element.
//
// Asm: VPABSW, CPU Feature: AVX512BW
-func (x Int16x32) AbsoluteMasked(y Mask16x32) Int16x32
+func (x Int16x32) AbsoluteMasked(mask Mask16x32) Int16x32
// AbsoluteMasked computes the absolute value of each element.
//
// Asm: VPABSD, CPU Feature: AVX512F
-func (x Int32x4) AbsoluteMasked(y Mask32x4) Int32x4
+func (x Int32x4) AbsoluteMasked(mask Mask32x4) Int32x4
// AbsoluteMasked computes the absolute value of each element.
//
// Asm: VPABSD, CPU Feature: AVX512F
-func (x Int32x8) AbsoluteMasked(y Mask32x8) Int32x8
+func (x Int32x8) AbsoluteMasked(mask Mask32x8) Int32x8
// AbsoluteMasked computes the absolute value of each element.
//
// Asm: VPABSD, CPU Feature: AVX512F
-func (x Int32x16) AbsoluteMasked(y Mask32x16) Int32x16
+func (x Int32x16) AbsoluteMasked(mask Mask32x16) Int32x16
// AbsoluteMasked computes the absolute value of each element.
//
// Asm: VPABSQ, CPU Feature: AVX512F
-func (x Int64x2) AbsoluteMasked(y Mask64x2) Int64x2
+func (x Int64x2) AbsoluteMasked(mask Mask64x2) Int64x2
// AbsoluteMasked computes the absolute value of each element.
//
// Asm: VPABSQ, CPU Feature: AVX512F
-func (x Int64x4) AbsoluteMasked(y Mask64x4) Int64x4
+func (x Int64x4) AbsoluteMasked(mask Mask64x4) Int64x4
// AbsoluteMasked computes the absolute value of each element.
//
// Asm: VPABSQ, CPU Feature: AVX512F
-func (x Int64x8) AbsoluteMasked(y Mask64x8) Int64x8
+func (x Int64x8) AbsoluteMasked(mask Mask64x8) Int64x8
/* Add */
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VADDPS, CPU Feature: AVX512F
-func (x Float32x4) AddMasked(y Float32x4, z Mask32x4) Float32x4
+func (x Float32x4) AddMasked(y Float32x4, mask Mask32x4) Float32x4
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VADDPS, CPU Feature: AVX512F
-func (x Float32x8) AddMasked(y Float32x8, z Mask32x8) Float32x8
+func (x Float32x8) AddMasked(y Float32x8, mask Mask32x8) Float32x8
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VADDPS, CPU Feature: AVX512F
-func (x Float32x16) AddMasked(y Float32x16, z Mask32x16) Float32x16
+func (x Float32x16) AddMasked(y Float32x16, mask Mask32x16) Float32x16
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VADDPD, CPU Feature: AVX512F
-func (x Float64x2) AddMasked(y Float64x2, z Mask64x2) Float64x2
+func (x Float64x2) AddMasked(y Float64x2, mask Mask64x2) Float64x2
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VADDPD, CPU Feature: AVX512F
-func (x Float64x4) AddMasked(y Float64x4, z Mask64x4) Float64x4
+func (x Float64x4) AddMasked(y Float64x4, mask Mask64x4) Float64x4
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VADDPD, CPU Feature: AVX512F
-func (x Float64x8) AddMasked(y Float64x8, z Mask64x8) Float64x8
+func (x Float64x8) AddMasked(y Float64x8, mask Mask64x8) Float64x8
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX512BW
-func (x Int8x16) AddMasked(y Int8x16, z Mask8x16) Int8x16
+func (x Int8x16) AddMasked(y Int8x16, mask Mask8x16) Int8x16
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX512BW
-func (x Int8x32) AddMasked(y Int8x32, z Mask8x32) Int8x32
+func (x Int8x32) AddMasked(y Int8x32, mask Mask8x32) Int8x32
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX512BW
-func (x Int8x64) AddMasked(y Int8x64, z Mask8x64) Int8x64
+func (x Int8x64) AddMasked(y Int8x64, mask Mask8x64) Int8x64
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX512BW
-func (x Int16x8) AddMasked(y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) AddMasked(y Int16x8, mask Mask16x8) Int16x8
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX512BW
-func (x Int16x16) AddMasked(y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) AddMasked(y Int16x16, mask Mask16x16) Int16x16
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX512BW
-func (x Int16x32) AddMasked(y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) AddMasked(y Int16x32, mask Mask16x32) Int16x32
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX512F
-func (x Int32x4) AddMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) AddMasked(y Int32x4, mask Mask32x4) Int32x4
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX512F
-func (x Int32x8) AddMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) AddMasked(y Int32x8, mask Mask32x8) Int32x8
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX512F
-func (x Int32x16) AddMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) AddMasked(y Int32x16, mask Mask32x16) Int32x16
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX512F
-func (x Int64x2) AddMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) AddMasked(y Int64x2, mask Mask64x2) Int64x2
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX512F
-func (x Int64x4) AddMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) AddMasked(y Int64x4, mask Mask64x4) Int64x4
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX512F
-func (x Int64x8) AddMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) AddMasked(y Int64x8, mask Mask64x8) Int64x8
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX512BW
-func (x Uint8x16) AddMasked(y Uint8x16, z Mask8x16) Uint8x16
+func (x Uint8x16) AddMasked(y Uint8x16, mask Mask8x16) Uint8x16
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX512BW
-func (x Uint8x32) AddMasked(y Uint8x32, z Mask8x32) Uint8x32
+func (x Uint8x32) AddMasked(y Uint8x32, mask Mask8x32) Uint8x32
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX512BW
-func (x Uint8x64) AddMasked(y Uint8x64, z Mask8x64) Uint8x64
+func (x Uint8x64) AddMasked(y Uint8x64, mask Mask8x64) Uint8x64
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX512BW
-func (x Uint16x8) AddMasked(y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) AddMasked(y Uint16x8, mask Mask16x8) Uint16x8
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX512BW
-func (x Uint16x16) AddMasked(y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) AddMasked(y Uint16x16, mask Mask16x16) Uint16x16
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX512BW
-func (x Uint16x32) AddMasked(y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) AddMasked(y Uint16x32, mask Mask16x32) Uint16x32
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX512F
-func (x Uint32x4) AddMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) AddMasked(y Uint32x4, mask Mask32x4) Uint32x4
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX512F
-func (x Uint32x8) AddMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) AddMasked(y Uint32x8, mask Mask32x8) Uint32x8
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX512F
-func (x Uint32x16) AddMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) AddMasked(y Uint32x16, mask Mask32x16) Uint32x16
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX512F
-func (x Uint64x2) AddMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) AddMasked(y Uint64x2, mask Mask64x2) Uint64x2
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX512F
-func (x Uint64x4) AddMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) AddMasked(y Uint64x4, mask Mask64x4) Uint64x4
// AddMasked adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX512F
-func (x Uint64x8) AddMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) AddMasked(y Uint64x8, mask Mask64x8) Uint64x8
/* AddSub */
// AndMasked performs a masked bitwise AND operation between two vectors.
//
// Asm: VPANDD, CPU Feature: AVX512F
-func (x Int32x4) AndMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) AndMasked(y Int32x4, mask Mask32x4) Int32x4
// AndMasked performs a masked bitwise AND operation between two vectors.
//
// Asm: VPANDD, CPU Feature: AVX512F
-func (x Int32x8) AndMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) AndMasked(y Int32x8, mask Mask32x8) Int32x8
// AndMasked performs a masked bitwise AND operation between two vectors.
//
// Asm: VPANDD, CPU Feature: AVX512F
-func (x Int32x16) AndMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) AndMasked(y Int32x16, mask Mask32x16) Int32x16
// AndMasked performs a masked bitwise AND operation between two vectors.
//
// Asm: VPANDQ, CPU Feature: AVX512F
-func (x Int64x2) AndMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) AndMasked(y Int64x2, mask Mask64x2) Int64x2
// AndMasked performs a masked bitwise AND operation between two vectors.
//
// Asm: VPANDQ, CPU Feature: AVX512F
-func (x Int64x4) AndMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) AndMasked(y Int64x4, mask Mask64x4) Int64x4
// AndMasked performs a masked bitwise AND operation between two vectors.
//
// Asm: VPANDQ, CPU Feature: AVX512F
-func (x Int64x8) AndMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) AndMasked(y Int64x8, mask Mask64x8) Int64x8
// AndMasked performs a masked bitwise AND operation between two vectors.
//
// Asm: VPANDD, CPU Feature: AVX512F
-func (x Uint32x4) AndMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) AndMasked(y Uint32x4, mask Mask32x4) Uint32x4
// AndMasked performs a masked bitwise AND operation between two vectors.
//
// Asm: VPANDD, CPU Feature: AVX512F
-func (x Uint32x8) AndMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) AndMasked(y Uint32x8, mask Mask32x8) Uint32x8
// AndMasked performs a masked bitwise AND operation between two vectors.
//
// Asm: VPANDD, CPU Feature: AVX512F
-func (x Uint32x16) AndMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) AndMasked(y Uint32x16, mask Mask32x16) Uint32x16
// AndMasked performs a masked bitwise AND operation between two vectors.
//
// Asm: VPANDQ, CPU Feature: AVX512F
-func (x Uint64x2) AndMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) AndMasked(y Uint64x2, mask Mask64x2) Uint64x2
// AndMasked performs a masked bitwise AND operation between two vectors.
//
// Asm: VPANDQ, CPU Feature: AVX512F
-func (x Uint64x4) AndMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) AndMasked(y Uint64x4, mask Mask64x4) Uint64x4
// AndMasked performs a masked bitwise AND operation between two vectors.
//
// Asm: VPANDQ, CPU Feature: AVX512F
-func (x Uint64x8) AndMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) AndMasked(y Uint64x8, mask Mask64x8) Uint64x8
/* AndNot */
// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
//
// Asm: VPANDND, CPU Feature: AVX512F
-func (x Int32x4) AndNotMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) AndNotMasked(y Int32x4, mask Mask32x4) Int32x4
// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
//
// Asm: VPANDND, CPU Feature: AVX512F
-func (x Int32x8) AndNotMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) AndNotMasked(y Int32x8, mask Mask32x8) Int32x8
// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
//
// Asm: VPANDND, CPU Feature: AVX512F
-func (x Int32x16) AndNotMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) AndNotMasked(y Int32x16, mask Mask32x16) Int32x16
// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
//
// Asm: VPANDNQ, CPU Feature: AVX512F
-func (x Int64x2) AndNotMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) AndNotMasked(y Int64x2, mask Mask64x2) Int64x2
// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
//
// Asm: VPANDNQ, CPU Feature: AVX512F
-func (x Int64x4) AndNotMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) AndNotMasked(y Int64x4, mask Mask64x4) Int64x4
// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
//
// Asm: VPANDNQ, CPU Feature: AVX512F
-func (x Int64x8) AndNotMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) AndNotMasked(y Int64x8, mask Mask64x8) Int64x8
// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
//
// Asm: VPANDND, CPU Feature: AVX512F
-func (x Uint32x4) AndNotMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) AndNotMasked(y Uint32x4, mask Mask32x4) Uint32x4
// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
//
// Asm: VPANDND, CPU Feature: AVX512F
-func (x Uint32x8) AndNotMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) AndNotMasked(y Uint32x8, mask Mask32x8) Uint32x8
// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
//
// Asm: VPANDND, CPU Feature: AVX512F
-func (x Uint32x16) AndNotMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) AndNotMasked(y Uint32x16, mask Mask32x16) Uint32x16
// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
//
// Asm: VPANDNQ, CPU Feature: AVX512F
-func (x Uint64x2) AndNotMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) AndNotMasked(y Uint64x2, mask Mask64x2) Uint64x2
// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
//
// Asm: VPANDNQ, CPU Feature: AVX512F
-func (x Uint64x4) AndNotMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) AndNotMasked(y Uint64x4, mask Mask64x4) Uint64x4
// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
//
// Asm: VPANDNQ, CPU Feature: AVX512F
-func (x Uint64x8) AndNotMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) AndNotMasked(y Uint64x8, mask Mask64x8) Uint64x8
/* ApproximateReciprocal */
// ApproximateReciprocalMasked computes an approximate reciprocal of each element.
//
// Asm: VRCP14PS, CPU Feature: AVX512F
-func (x Float32x4) ApproximateReciprocalMasked(y Mask32x4) Float32x4
+func (x Float32x4) ApproximateReciprocalMasked(mask Mask32x4) Float32x4
// ApproximateReciprocalMasked computes an approximate reciprocal of each element.
//
// Asm: VRCP14PS, CPU Feature: AVX512F
-func (x Float32x8) ApproximateReciprocalMasked(y Mask32x8) Float32x8
+func (x Float32x8) ApproximateReciprocalMasked(mask Mask32x8) Float32x8
// ApproximateReciprocalMasked computes an approximate reciprocal of each element.
//
// Asm: VRCP14PS, CPU Feature: AVX512F
-func (x Float32x16) ApproximateReciprocalMasked(y Mask32x16) Float32x16
+func (x Float32x16) ApproximateReciprocalMasked(mask Mask32x16) Float32x16
// ApproximateReciprocalMasked computes an approximate reciprocal of each element.
//
// Asm: VRCP14PD, CPU Feature: AVX512F
-func (x Float64x2) ApproximateReciprocalMasked(y Mask64x2) Float64x2
+func (x Float64x2) ApproximateReciprocalMasked(mask Mask64x2) Float64x2
// ApproximateReciprocalMasked computes an approximate reciprocal of each element.
//
// Asm: VRCP14PD, CPU Feature: AVX512F
-func (x Float64x4) ApproximateReciprocalMasked(y Mask64x4) Float64x4
+func (x Float64x4) ApproximateReciprocalMasked(mask Mask64x4) Float64x4
// ApproximateReciprocalMasked computes an approximate reciprocal of each element.
//
// Asm: VRCP14PD, CPU Feature: AVX512F
-func (x Float64x8) ApproximateReciprocalMasked(y Mask64x8) Float64x8
+func (x Float64x8) ApproximateReciprocalMasked(mask Mask64x8) Float64x8
/* ApproximateReciprocalOfSqrt */
// ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRT14PS, CPU Feature: AVX512F
-func (x Float32x4) ApproximateReciprocalOfSqrtMasked(y Mask32x4) Float32x4
+func (x Float32x4) ApproximateReciprocalOfSqrtMasked(mask Mask32x4) Float32x4
// ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRT14PS, CPU Feature: AVX512F
-func (x Float32x8) ApproximateReciprocalOfSqrtMasked(y Mask32x8) Float32x8
+func (x Float32x8) ApproximateReciprocalOfSqrtMasked(mask Mask32x8) Float32x8
// ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRT14PS, CPU Feature: AVX512F
-func (x Float32x16) ApproximateReciprocalOfSqrtMasked(y Mask32x16) Float32x16
+func (x Float32x16) ApproximateReciprocalOfSqrtMasked(mask Mask32x16) Float32x16
// ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRT14PD, CPU Feature: AVX512F
-func (x Float64x2) ApproximateReciprocalOfSqrtMasked(y Mask64x2) Float64x2
+func (x Float64x2) ApproximateReciprocalOfSqrtMasked(mask Mask64x2) Float64x2
// ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRT14PD, CPU Feature: AVX512F
-func (x Float64x4) ApproximateReciprocalOfSqrtMasked(y Mask64x4) Float64x4
+func (x Float64x4) ApproximateReciprocalOfSqrtMasked(mask Mask64x4) Float64x4
// ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRT14PD, CPU Feature: AVX512F
-func (x Float64x8) ApproximateReciprocalOfSqrtMasked(y Mask64x8) Float64x8
+func (x Float64x8) ApproximateReciprocalOfSqrtMasked(mask Mask64x8) Float64x8
/* Average */
// AverageMasked computes the rounded average of corresponding elements.
//
// Asm: VPAVGB, CPU Feature: AVX512BW
-func (x Uint8x16) AverageMasked(y Uint8x16, z Mask8x16) Uint8x16
+func (x Uint8x16) AverageMasked(y Uint8x16, mask Mask8x16) Uint8x16
// AverageMasked computes the rounded average of corresponding elements.
//
// Asm: VPAVGB, CPU Feature: AVX512BW
-func (x Uint8x32) AverageMasked(y Uint8x32, z Mask8x32) Uint8x32
+func (x Uint8x32) AverageMasked(y Uint8x32, mask Mask8x32) Uint8x32
// AverageMasked computes the rounded average of corresponding elements.
//
// Asm: VPAVGB, CPU Feature: AVX512BW
-func (x Uint8x64) AverageMasked(y Uint8x64, z Mask8x64) Uint8x64
+func (x Uint8x64) AverageMasked(y Uint8x64, mask Mask8x64) Uint8x64
// AverageMasked computes the rounded average of corresponding elements.
//
// Asm: VPAVGW, CPU Feature: AVX512BW
-func (x Uint16x8) AverageMasked(y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) AverageMasked(y Uint16x8, mask Mask16x8) Uint16x8
// AverageMasked computes the rounded average of corresponding elements.
//
// Asm: VPAVGW, CPU Feature: AVX512BW
-func (x Uint16x16) AverageMasked(y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) AverageMasked(y Uint16x16, mask Mask16x16) Uint16x16
// AverageMasked computes the rounded average of corresponding elements.
//
// Asm: VPAVGW, CPU Feature: AVX512BW
-func (x Uint16x32) AverageMasked(y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) AverageMasked(y Uint16x32, mask Mask16x32) Uint16x32
/* Ceil */
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x4) CeilWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
+func (x Float32x4) CeilWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
// CeilWithPrecisionMasked rounds elements up with specified precision, masked.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x8) CeilWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
+func (x Float32x8) CeilWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
// CeilWithPrecisionMasked rounds elements up with specified precision, masked.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x16) CeilWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
+func (x Float32x16) CeilWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
// CeilWithPrecisionMasked rounds elements up with specified precision, masked.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x2) CeilWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
+func (x Float64x2) CeilWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
// CeilWithPrecisionMasked rounds elements up with specified precision, masked.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x4) CeilWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
+func (x Float64x4) CeilWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
// CeilWithPrecisionMasked rounds elements up with specified precision, masked.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x8) CeilWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
+func (x Float64x8) CeilWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
/* DiffWithCeilWithPrecision */
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x4) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
+func (x Float32x4) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
// DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x8) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
+func (x Float32x8) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
// DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x16) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
+func (x Float32x16) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
// DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x2) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
+func (x Float64x2) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
// DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x4) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
+func (x Float64x4) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
// DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x8) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
+func (x Float64x8) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
/* DiffWithFloorWithPrecision */
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x4) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
+func (x Float32x4) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
// DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x8) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
+func (x Float32x8) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
// DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x16) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
+func (x Float32x16) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
// DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x2) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
+func (x Float64x2) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
// DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x4) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
+func (x Float64x4) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
// DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x8) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
+func (x Float64x8) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
/* DiffWithRoundWithPrecision */
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x4) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
+func (x Float32x4) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
// DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x8) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
+func (x Float32x8) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
// DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x16) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
+func (x Float32x16) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
// DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x2) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
+func (x Float64x2) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
// DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x4) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
+func (x Float64x4) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
// DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x8) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
+func (x Float64x8) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
/* DiffWithTruncWithPrecision */
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x4) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
+func (x Float32x4) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
// DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x8) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
+func (x Float32x8) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
// DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x16) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
+func (x Float32x16) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
// DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x2) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
+func (x Float64x2) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
// DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x4) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
+func (x Float64x4) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
// DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x8) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
+func (x Float64x8) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
/* Div */
// DivMasked divides elements of two vectors.
//
// Asm: VDIVPS, CPU Feature: AVX512F
-func (x Float32x4) DivMasked(y Float32x4, z Mask32x4) Float32x4
+func (x Float32x4) DivMasked(y Float32x4, mask Mask32x4) Float32x4
// DivMasked divides elements of two vectors.
//
// Asm: VDIVPS, CPU Feature: AVX512F
-func (x Float32x8) DivMasked(y Float32x8, z Mask32x8) Float32x8
+func (x Float32x8) DivMasked(y Float32x8, mask Mask32x8) Float32x8
// DivMasked divides elements of two vectors.
//
// Asm: VDIVPS, CPU Feature: AVX512F
-func (x Float32x16) DivMasked(y Float32x16, z Mask32x16) Float32x16
+func (x Float32x16) DivMasked(y Float32x16, mask Mask32x16) Float32x16
// DivMasked divides elements of two vectors.
//
// Asm: VDIVPD, CPU Feature: AVX512F
-func (x Float64x2) DivMasked(y Float64x2, z Mask64x2) Float64x2
+func (x Float64x2) DivMasked(y Float64x2, mask Mask64x2) Float64x2
// DivMasked divides elements of two vectors.
//
// Asm: VDIVPD, CPU Feature: AVX512F
-func (x Float64x4) DivMasked(y Float64x4, z Mask64x4) Float64x4
+func (x Float64x4) DivMasked(y Float64x4, mask Mask64x4) Float64x4
// DivMasked divides elements of two vectors.
//
// Asm: VDIVPD, CPU Feature: AVX512F
-func (x Float64x8) DivMasked(y Float64x8, z Mask64x8) Float64x8
+func (x Float64x8) DivMasked(y Float64x8, mask Mask64x8) Float64x8
/* DotProdBroadcast */
// EqualMasked compares for equality, masked.
//
// Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x4) EqualMasked(y Float32x4, z Mask32x4) Mask32x4
+func (x Float32x4) EqualMasked(y Float32x4, mask Mask32x4) Mask32x4
// EqualMasked compares for equality, masked.
//
// Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x8) EqualMasked(y Float32x8, z Mask32x8) Mask32x8
+func (x Float32x8) EqualMasked(y Float32x8, mask Mask32x8) Mask32x8
// EqualMasked compares for equality, masked.
//
// Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x16) EqualMasked(y Float32x16, z Mask32x16) Mask32x16
+func (x Float32x16) EqualMasked(y Float32x16, mask Mask32x16) Mask32x16
// EqualMasked compares for equality, masked.
//
// Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x2) EqualMasked(y Float64x2, z Mask64x2) Mask64x2
+func (x Float64x2) EqualMasked(y Float64x2, mask Mask64x2) Mask64x2
// EqualMasked compares for equality, masked.
//
// Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x4) EqualMasked(y Float64x4, z Mask64x4) Mask64x4
+func (x Float64x4) EqualMasked(y Float64x4, mask Mask64x4) Mask64x4
// EqualMasked compares for equality, masked.
//
// Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x8) EqualMasked(y Float64x8, z Mask64x8) Mask64x8
+func (x Float64x8) EqualMasked(y Float64x8, mask Mask64x8) Mask64x8
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x16) EqualMasked(y Int8x16, z Mask8x16) Mask8x16
+func (x Int8x16) EqualMasked(y Int8x16, mask Mask8x16) Mask8x16
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x32) EqualMasked(y Int8x32, z Mask8x32) Mask8x32
+func (x Int8x32) EqualMasked(y Int8x32, mask Mask8x32) Mask8x32
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x64) EqualMasked(y Int8x64, z Mask8x64) Mask8x64
+func (x Int8x64) EqualMasked(y Int8x64, mask Mask8x64) Mask8x64
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x8) EqualMasked(y Int16x8, z Mask16x8) Mask16x8
+func (x Int16x8) EqualMasked(y Int16x8, mask Mask16x8) Mask16x8
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x16) EqualMasked(y Int16x16, z Mask16x16) Mask16x16
+func (x Int16x16) EqualMasked(y Int16x16, mask Mask16x16) Mask16x16
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x32) EqualMasked(y Int16x32, z Mask16x32) Mask16x32
+func (x Int16x32) EqualMasked(y Int16x32, mask Mask16x32) Mask16x32
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x4) EqualMasked(y Int32x4, z Mask32x4) Mask32x4
+func (x Int32x4) EqualMasked(y Int32x4, mask Mask32x4) Mask32x4
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x8) EqualMasked(y Int32x8, z Mask32x8) Mask32x8
+func (x Int32x8) EqualMasked(y Int32x8, mask Mask32x8) Mask32x8
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x16) EqualMasked(y Int32x16, z Mask32x16) Mask32x16
+func (x Int32x16) EqualMasked(y Int32x16, mask Mask32x16) Mask32x16
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x2) EqualMasked(y Int64x2, z Mask64x2) Mask64x2
+func (x Int64x2) EqualMasked(y Int64x2, mask Mask64x2) Mask64x2
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x4) EqualMasked(y Int64x4, z Mask64x4) Mask64x4
+func (x Int64x4) EqualMasked(y Int64x4, mask Mask64x4) Mask64x4
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x8) EqualMasked(y Int64x8, z Mask64x8) Mask64x8
+func (x Int64x8) EqualMasked(y Int64x8, mask Mask64x8) Mask64x8
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x16) EqualMasked(y Uint8x16, z Mask8x16) Mask8x16
+func (x Uint8x16) EqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x32) EqualMasked(y Uint8x32, z Mask8x32) Mask8x32
+func (x Uint8x32) EqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x64) EqualMasked(y Uint8x64, z Mask8x64) Mask8x64
+func (x Uint8x64) EqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x8) EqualMasked(y Uint16x8, z Mask16x8) Mask16x8
+func (x Uint16x8) EqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x16) EqualMasked(y Uint16x16, z Mask16x16) Mask16x16
+func (x Uint16x16) EqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x32) EqualMasked(y Uint16x32, z Mask16x32) Mask16x32
+func (x Uint16x32) EqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x4) EqualMasked(y Uint32x4, z Mask32x4) Mask32x4
+func (x Uint32x4) EqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x8) EqualMasked(y Uint32x8, z Mask32x8) Mask32x8
+func (x Uint32x8) EqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x16) EqualMasked(y Uint32x16, z Mask32x16) Mask32x16
+func (x Uint32x16) EqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x2) EqualMasked(y Uint64x2, z Mask64x2) Mask64x2
+func (x Uint64x2) EqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x4) EqualMasked(y Uint64x4, z Mask64x4) Mask64x4
+func (x Uint64x4) EqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
// EqualMasked compares for equality, masked.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x8) EqualMasked(y Uint64x8, z Mask64x8) Mask64x8
+func (x Uint64x8) EqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
/* Floor */
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x4) FloorWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
+func (x Float32x4) FloorWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
// FloorWithPrecisionMasked rounds elements down with specified precision, masked.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x8) FloorWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
+func (x Float32x8) FloorWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
// FloorWithPrecisionMasked rounds elements down with specified precision, masked.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x16) FloorWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
+func (x Float32x16) FloorWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
// FloorWithPrecisionMasked rounds elements down with specified precision, masked.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x2) FloorWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
+func (x Float64x2) FloorWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
// FloorWithPrecisionMasked rounds elements down with specified precision, masked.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x4) FloorWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
+func (x Float64x4) FloorWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
// FloorWithPrecisionMasked rounds elements down with specified precision, masked.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x8) FloorWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
+func (x Float64x8) FloorWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
/* FusedMultiplyAdd */
// FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
//
// Asm: VFMADD213PS, CPU Feature: AVX512F
-func (x Float32x4) FusedMultiplyAddMasked(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+func (x Float32x4) FusedMultiplyAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
// FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
//
// Asm: VFMADD213PS, CPU Feature: AVX512F
-func (x Float32x8) FusedMultiplyAddMasked(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+func (x Float32x8) FusedMultiplyAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
// FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
//
// Asm: VFMADD213PS, CPU Feature: AVX512F
-func (x Float32x16) FusedMultiplyAddMasked(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+func (x Float32x16) FusedMultiplyAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
// FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
//
// Asm: VFMADD213PD, CPU Feature: AVX512F
-func (x Float64x2) FusedMultiplyAddMasked(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+func (x Float64x2) FusedMultiplyAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
// FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
//
// Asm: VFMADD213PD, CPU Feature: AVX512F
-func (x Float64x4) FusedMultiplyAddMasked(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+func (x Float64x4) FusedMultiplyAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
// FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
//
// Asm: VFMADD213PD, CPU Feature: AVX512F
-func (x Float64x8) FusedMultiplyAddMasked(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+func (x Float64x8) FusedMultiplyAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
/* FusedMultiplyAddSub */
// FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
//
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
-func (x Float32x4) FusedMultiplyAddSubMasked(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+func (x Float32x4) FusedMultiplyAddSubMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
// FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
//
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
-func (x Float32x8) FusedMultiplyAddSubMasked(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+func (x Float32x8) FusedMultiplyAddSubMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
// FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
//
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
-func (x Float32x16) FusedMultiplyAddSubMasked(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+func (x Float32x16) FusedMultiplyAddSubMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
// FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
//
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
-func (x Float64x2) FusedMultiplyAddSubMasked(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+func (x Float64x2) FusedMultiplyAddSubMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
// FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
//
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
-func (x Float64x4) FusedMultiplyAddSubMasked(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+func (x Float64x4) FusedMultiplyAddSubMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
// FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
//
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
-func (x Float64x8) FusedMultiplyAddSubMasked(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+func (x Float64x8) FusedMultiplyAddSubMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
/* FusedMultiplySubAdd */
// FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
//
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
-func (x Float32x4) FusedMultiplySubAddMasked(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+func (x Float32x4) FusedMultiplySubAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
// FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
//
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
-func (x Float32x8) FusedMultiplySubAddMasked(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+func (x Float32x8) FusedMultiplySubAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
// FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
//
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
-func (x Float32x16) FusedMultiplySubAddMasked(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+func (x Float32x16) FusedMultiplySubAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
// FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
//
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
-func (x Float64x2) FusedMultiplySubAddMasked(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+func (x Float64x2) FusedMultiplySubAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
// FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
//
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
-func (x Float64x4) FusedMultiplySubAddMasked(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+func (x Float64x4) FusedMultiplySubAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
// FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
//
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
-func (x Float64x8) FusedMultiplySubAddMasked(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+func (x Float64x8) FusedMultiplySubAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
/* GaloisFieldAffineTransform */
// reduction polynomial x^8 + x^4 + x^3 + x + 1.
//
// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
-func (x Uint8x16) GaloisFieldMulMasked(y Uint8x16, z Mask8x16) Uint8x16
+func (x Uint8x16) GaloisFieldMulMasked(y Uint8x16, mask Mask8x16) Uint8x16
// GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
// reduction polynomial x^8 + x^4 + x^3 + x + 1.
//
// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
-func (x Uint8x32) GaloisFieldMulMasked(y Uint8x32, z Mask8x32) Uint8x32
+func (x Uint8x32) GaloisFieldMulMasked(y Uint8x32, mask Mask8x32) Uint8x32
// GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
// reduction polynomial x^8 + x^4 + x^3 + x + 1.
//
// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
-func (x Uint8x64) GaloisFieldMulMasked(y Uint8x64, z Mask8x64) Uint8x64
+func (x Uint8x64) GaloisFieldMulMasked(y Uint8x64, mask Mask8x64) Uint8x64
/* Get128 */
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x4) GreaterEqualMasked(y Float32x4, z Mask32x4) Mask32x4
+func (x Float32x4) GreaterEqualMasked(y Float32x4, mask Mask32x4) Mask32x4
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x8) GreaterEqualMasked(y Float32x8, z Mask32x8) Mask32x8
+func (x Float32x8) GreaterEqualMasked(y Float32x8, mask Mask32x8) Mask32x8
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x16) GreaterEqualMasked(y Float32x16, z Mask32x16) Mask32x16
+func (x Float32x16) GreaterEqualMasked(y Float32x16, mask Mask32x16) Mask32x16
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x2) GreaterEqualMasked(y Float64x2, z Mask64x2) Mask64x2
+func (x Float64x2) GreaterEqualMasked(y Float64x2, mask Mask64x2) Mask64x2
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x4) GreaterEqualMasked(y Float64x4, z Mask64x4) Mask64x4
+func (x Float64x4) GreaterEqualMasked(y Float64x4, mask Mask64x4) Mask64x4
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x8) GreaterEqualMasked(y Float64x8, z Mask64x8) Mask64x8
+func (x Float64x8) GreaterEqualMasked(y Float64x8, mask Mask64x8) Mask64x8
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x16) GreaterEqualMasked(y Int8x16, z Mask8x16) Mask8x16
+func (x Int8x16) GreaterEqualMasked(y Int8x16, mask Mask8x16) Mask8x16
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x32) GreaterEqualMasked(y Int8x32, z Mask8x32) Mask8x32
+func (x Int8x32) GreaterEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x64) GreaterEqualMasked(y Int8x64, z Mask8x64) Mask8x64
+func (x Int8x64) GreaterEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x8) GreaterEqualMasked(y Int16x8, z Mask16x8) Mask16x8
+func (x Int16x8) GreaterEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x16) GreaterEqualMasked(y Int16x16, z Mask16x16) Mask16x16
+func (x Int16x16) GreaterEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x32) GreaterEqualMasked(y Int16x32, z Mask16x32) Mask16x32
+func (x Int16x32) GreaterEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x4) GreaterEqualMasked(y Int32x4, z Mask32x4) Mask32x4
+func (x Int32x4) GreaterEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x8) GreaterEqualMasked(y Int32x8, z Mask32x8) Mask32x8
+func (x Int32x8) GreaterEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x16) GreaterEqualMasked(y Int32x16, z Mask32x16) Mask32x16
+func (x Int32x16) GreaterEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x2) GreaterEqualMasked(y Int64x2, z Mask64x2) Mask64x2
+func (x Int64x2) GreaterEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x4) GreaterEqualMasked(y Int64x4, z Mask64x4) Mask64x4
+func (x Int64x4) GreaterEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x8) GreaterEqualMasked(y Int64x8, z Mask64x8) Mask64x8
+func (x Int64x8) GreaterEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x16) GreaterEqualMasked(y Uint8x16, z Mask8x16) Mask8x16
+func (x Uint8x16) GreaterEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x32) GreaterEqualMasked(y Uint8x32, z Mask8x32) Mask8x32
+func (x Uint8x32) GreaterEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x64) GreaterEqualMasked(y Uint8x64, z Mask8x64) Mask8x64
+func (x Uint8x64) GreaterEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x8) GreaterEqualMasked(y Uint16x8, z Mask16x8) Mask16x8
+func (x Uint16x8) GreaterEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x16) GreaterEqualMasked(y Uint16x16, z Mask16x16) Mask16x16
+func (x Uint16x16) GreaterEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x32) GreaterEqualMasked(y Uint16x32, z Mask16x32) Mask16x32
+func (x Uint16x32) GreaterEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x4) GreaterEqualMasked(y Uint32x4, z Mask32x4) Mask32x4
+func (x Uint32x4) GreaterEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x8) GreaterEqualMasked(y Uint32x8, z Mask32x8) Mask32x8
+func (x Uint32x8) GreaterEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x16) GreaterEqualMasked(y Uint32x16, z Mask32x16) Mask32x16
+func (x Uint32x16) GreaterEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x2) GreaterEqualMasked(y Uint64x2, z Mask64x2) Mask64x2
+func (x Uint64x2) GreaterEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x4) GreaterEqualMasked(y Uint64x4, z Mask64x4) Mask64x4
+func (x Uint64x4) GreaterEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
// GreaterEqualMasked compares for greater than or equal.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x8) GreaterEqualMasked(y Uint64x8, z Mask64x8) Mask64x8
+func (x Uint64x8) GreaterEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
/* GreaterMasked */
// GreaterMasked compares for greater than.
//
// Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x4) GreaterMasked(y Float32x4, z Mask32x4) Mask32x4
+func (x Float32x4) GreaterMasked(y Float32x4, mask Mask32x4) Mask32x4
// GreaterMasked compares for greater than.
//
// Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x8) GreaterMasked(y Float32x8, z Mask32x8) Mask32x8
+func (x Float32x8) GreaterMasked(y Float32x8, mask Mask32x8) Mask32x8
// GreaterMasked compares for greater than.
//
// Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x16) GreaterMasked(y Float32x16, z Mask32x16) Mask32x16
+func (x Float32x16) GreaterMasked(y Float32x16, mask Mask32x16) Mask32x16
// GreaterMasked compares for greater than.
//
// Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x2) GreaterMasked(y Float64x2, z Mask64x2) Mask64x2
+func (x Float64x2) GreaterMasked(y Float64x2, mask Mask64x2) Mask64x2
// GreaterMasked compares for greater than.
//
// Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x4) GreaterMasked(y Float64x4, z Mask64x4) Mask64x4
+func (x Float64x4) GreaterMasked(y Float64x4, mask Mask64x4) Mask64x4
// GreaterMasked compares for greater than.
//
// Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x8) GreaterMasked(y Float64x8, z Mask64x8) Mask64x8
+func (x Float64x8) GreaterMasked(y Float64x8, mask Mask64x8) Mask64x8
// GreaterMasked compares for greater than.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x16) GreaterMasked(y Int8x16, z Mask8x16) Mask8x16
+func (x Int8x16) GreaterMasked(y Int8x16, mask Mask8x16) Mask8x16
// GreaterMasked compares for greater than.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x32) GreaterMasked(y Int8x32, z Mask8x32) Mask8x32
+func (x Int8x32) GreaterMasked(y Int8x32, mask Mask8x32) Mask8x32
// GreaterMasked compares for greater than.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x64) GreaterMasked(y Int8x64, z Mask8x64) Mask8x64
+func (x Int8x64) GreaterMasked(y Int8x64, mask Mask8x64) Mask8x64
// GreaterMasked compares for greater than.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x8) GreaterMasked(y Int16x8, z Mask16x8) Mask16x8
+func (x Int16x8) GreaterMasked(y Int16x8, mask Mask16x8) Mask16x8
// GreaterMasked compares for greater than.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x16) GreaterMasked(y Int16x16, z Mask16x16) Mask16x16
+func (x Int16x16) GreaterMasked(y Int16x16, mask Mask16x16) Mask16x16
// GreaterMasked compares for greater than.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x32) GreaterMasked(y Int16x32, z Mask16x32) Mask16x32
+func (x Int16x32) GreaterMasked(y Int16x32, mask Mask16x32) Mask16x32
// GreaterMasked compares for greater than.
//
// Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x4) GreaterMasked(y Int32x4, z Mask32x4) Mask32x4
+func (x Int32x4) GreaterMasked(y Int32x4, mask Mask32x4) Mask32x4
// GreaterMasked compares for greater than.
//
// Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x8) GreaterMasked(y Int32x8, z Mask32x8) Mask32x8
+func (x Int32x8) GreaterMasked(y Int32x8, mask Mask32x8) Mask32x8
// GreaterMasked compares for greater than.
//
// Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x16) GreaterMasked(y Int32x16, z Mask32x16) Mask32x16
+func (x Int32x16) GreaterMasked(y Int32x16, mask Mask32x16) Mask32x16
// GreaterMasked compares for greater than.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x2) GreaterMasked(y Int64x2, z Mask64x2) Mask64x2
+func (x Int64x2) GreaterMasked(y Int64x2, mask Mask64x2) Mask64x2
// GreaterMasked compares for greater than.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x4) GreaterMasked(y Int64x4, z Mask64x4) Mask64x4
+func (x Int64x4) GreaterMasked(y Int64x4, mask Mask64x4) Mask64x4
// GreaterMasked compares for greater than.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x8) GreaterMasked(y Int64x8, z Mask64x8) Mask64x8
+func (x Int64x8) GreaterMasked(y Int64x8, mask Mask64x8) Mask64x8
// GreaterMasked compares for greater than.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x16) GreaterMasked(y Uint8x16, z Mask8x16) Mask8x16
+func (x Uint8x16) GreaterMasked(y Uint8x16, mask Mask8x16) Mask8x16
// GreaterMasked compares for greater than.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x32) GreaterMasked(y Uint8x32, z Mask8x32) Mask8x32
+func (x Uint8x32) GreaterMasked(y Uint8x32, mask Mask8x32) Mask8x32
// GreaterMasked compares for greater than.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x64) GreaterMasked(y Uint8x64, z Mask8x64) Mask8x64
+func (x Uint8x64) GreaterMasked(y Uint8x64, mask Mask8x64) Mask8x64
// GreaterMasked compares for greater than.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x8) GreaterMasked(y Uint16x8, z Mask16x8) Mask16x8
+func (x Uint16x8) GreaterMasked(y Uint16x8, mask Mask16x8) Mask16x8
// GreaterMasked compares for greater than.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x16) GreaterMasked(y Uint16x16, z Mask16x16) Mask16x16
+func (x Uint16x16) GreaterMasked(y Uint16x16, mask Mask16x16) Mask16x16
// GreaterMasked compares for greater than.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x32) GreaterMasked(y Uint16x32, z Mask16x32) Mask16x32
+func (x Uint16x32) GreaterMasked(y Uint16x32, mask Mask16x32) Mask16x32
// GreaterMasked compares for greater than.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x4) GreaterMasked(y Uint32x4, z Mask32x4) Mask32x4
+func (x Uint32x4) GreaterMasked(y Uint32x4, mask Mask32x4) Mask32x4
// GreaterMasked compares for greater than.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x8) GreaterMasked(y Uint32x8, z Mask32x8) Mask32x8
+func (x Uint32x8) GreaterMasked(y Uint32x8, mask Mask32x8) Mask32x8
// GreaterMasked compares for greater than.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x16) GreaterMasked(y Uint32x16, z Mask32x16) Mask32x16
+func (x Uint32x16) GreaterMasked(y Uint32x16, mask Mask32x16) Mask32x16
// GreaterMasked compares for greater than.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x2) GreaterMasked(y Uint64x2, z Mask64x2) Mask64x2
+func (x Uint64x2) GreaterMasked(y Uint64x2, mask Mask64x2) Mask64x2
// GreaterMasked compares for greater than.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x4) GreaterMasked(y Uint64x4, z Mask64x4) Mask64x4
+func (x Uint64x4) GreaterMasked(y Uint64x4, mask Mask64x4) Mask64x4
// GreaterMasked compares for greater than.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x8) GreaterMasked(y Uint64x8, z Mask64x8) Mask64x8
+func (x Uint64x8) GreaterMasked(y Uint64x8, mask Mask64x8) Mask64x8
/* IsNan */
// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x4) IsNanMasked(y Float32x4, z Mask32x4) Mask32x4
+func (x Float32x4) IsNanMasked(y Float32x4, mask Mask32x4) Mask32x4
// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x8) IsNanMasked(y Float32x8, z Mask32x8) Mask32x8
+func (x Float32x8) IsNanMasked(y Float32x8, mask Mask32x8) Mask32x8
// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x16) IsNanMasked(y Float32x16, z Mask32x16) Mask32x16
+func (x Float32x16) IsNanMasked(y Float32x16, mask Mask32x16) Mask32x16
// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x2) IsNanMasked(y Float64x2, z Mask64x2) Mask64x2
+func (x Float64x2) IsNanMasked(y Float64x2, mask Mask64x2) Mask64x2
// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x4) IsNanMasked(y Float64x4, z Mask64x4) Mask64x4
+func (x Float64x4) IsNanMasked(y Float64x4, mask Mask64x4) Mask64x4
// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x8) IsNanMasked(y Float64x8, z Mask64x8) Mask64x8
+func (x Float64x8) IsNanMasked(y Float64x8, mask Mask64x8) Mask64x8
/* Less */
// LessEqualMasked compares for less than or equal.
//
// Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x4) LessEqualMasked(y Float32x4, z Mask32x4) Mask32x4
+func (x Float32x4) LessEqualMasked(y Float32x4, mask Mask32x4) Mask32x4
// LessEqualMasked compares for less than or equal.
//
// Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x8) LessEqualMasked(y Float32x8, z Mask32x8) Mask32x8
+func (x Float32x8) LessEqualMasked(y Float32x8, mask Mask32x8) Mask32x8
// LessEqualMasked compares for less than or equal.
//
// Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x16) LessEqualMasked(y Float32x16, z Mask32x16) Mask32x16
+func (x Float32x16) LessEqualMasked(y Float32x16, mask Mask32x16) Mask32x16
// LessEqualMasked compares for less than or equal.
//
// Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x2) LessEqualMasked(y Float64x2, z Mask64x2) Mask64x2
+func (x Float64x2) LessEqualMasked(y Float64x2, mask Mask64x2) Mask64x2
// LessEqualMasked compares for less than or equal.
//
// Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x4) LessEqualMasked(y Float64x4, z Mask64x4) Mask64x4
+func (x Float64x4) LessEqualMasked(y Float64x4, mask Mask64x4) Mask64x4
// LessEqualMasked compares for less than or equal.
//
// Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x8) LessEqualMasked(y Float64x8, z Mask64x8) Mask64x8
+func (x Float64x8) LessEqualMasked(y Float64x8, mask Mask64x8) Mask64x8
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x16) LessEqualMasked(y Int8x16, z Mask8x16) Mask8x16
+func (x Int8x16) LessEqualMasked(y Int8x16, mask Mask8x16) Mask8x16
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x32) LessEqualMasked(y Int8x32, z Mask8x32) Mask8x32
+func (x Int8x32) LessEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x64) LessEqualMasked(y Int8x64, z Mask8x64) Mask8x64
+func (x Int8x64) LessEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x8) LessEqualMasked(y Int16x8, z Mask16x8) Mask16x8
+func (x Int16x8) LessEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x16) LessEqualMasked(y Int16x16, z Mask16x16) Mask16x16
+func (x Int16x16) LessEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x32) LessEqualMasked(y Int16x32, z Mask16x32) Mask16x32
+func (x Int16x32) LessEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x4) LessEqualMasked(y Int32x4, z Mask32x4) Mask32x4
+func (x Int32x4) LessEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x8) LessEqualMasked(y Int32x8, z Mask32x8) Mask32x8
+func (x Int32x8) LessEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x16) LessEqualMasked(y Int32x16, z Mask32x16) Mask32x16
+func (x Int32x16) LessEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x2) LessEqualMasked(y Int64x2, z Mask64x2) Mask64x2
+func (x Int64x2) LessEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x4) LessEqualMasked(y Int64x4, z Mask64x4) Mask64x4
+func (x Int64x4) LessEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x8) LessEqualMasked(y Int64x8, z Mask64x8) Mask64x8
+func (x Int64x8) LessEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x16) LessEqualMasked(y Uint8x16, z Mask8x16) Mask8x16
+func (x Uint8x16) LessEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x32) LessEqualMasked(y Uint8x32, z Mask8x32) Mask8x32
+func (x Uint8x32) LessEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x64) LessEqualMasked(y Uint8x64, z Mask8x64) Mask8x64
+func (x Uint8x64) LessEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x8) LessEqualMasked(y Uint16x8, z Mask16x8) Mask16x8
+func (x Uint16x8) LessEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x16) LessEqualMasked(y Uint16x16, z Mask16x16) Mask16x16
+func (x Uint16x16) LessEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x32) LessEqualMasked(y Uint16x32, z Mask16x32) Mask16x32
+func (x Uint16x32) LessEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x4) LessEqualMasked(y Uint32x4, z Mask32x4) Mask32x4
+func (x Uint32x4) LessEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x8) LessEqualMasked(y Uint32x8, z Mask32x8) Mask32x8
+func (x Uint32x8) LessEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x16) LessEqualMasked(y Uint32x16, z Mask32x16) Mask32x16
+func (x Uint32x16) LessEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x2) LessEqualMasked(y Uint64x2, z Mask64x2) Mask64x2
+func (x Uint64x2) LessEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x4) LessEqualMasked(y Uint64x4, z Mask64x4) Mask64x4
+func (x Uint64x4) LessEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
// LessEqualMasked compares for less than or equal.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x8) LessEqualMasked(y Uint64x8, z Mask64x8) Mask64x8
+func (x Uint64x8) LessEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
/* LessMasked */
// LessMasked compares for less than.
//
// Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x4) LessMasked(y Float32x4, z Mask32x4) Mask32x4
+func (x Float32x4) LessMasked(y Float32x4, mask Mask32x4) Mask32x4
// LessMasked compares for less than.
//
// Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x8) LessMasked(y Float32x8, z Mask32x8) Mask32x8
+func (x Float32x8) LessMasked(y Float32x8, mask Mask32x8) Mask32x8
// LessMasked compares for less than.
//
// Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x16) LessMasked(y Float32x16, z Mask32x16) Mask32x16
+func (x Float32x16) LessMasked(y Float32x16, mask Mask32x16) Mask32x16
// LessMasked compares for less than.
//
// Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x2) LessMasked(y Float64x2, z Mask64x2) Mask64x2
+func (x Float64x2) LessMasked(y Float64x2, mask Mask64x2) Mask64x2
// LessMasked compares for less than.
//
// Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x4) LessMasked(y Float64x4, z Mask64x4) Mask64x4
+func (x Float64x4) LessMasked(y Float64x4, mask Mask64x4) Mask64x4
// LessMasked compares for less than.
//
// Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x8) LessMasked(y Float64x8, z Mask64x8) Mask64x8
+func (x Float64x8) LessMasked(y Float64x8, mask Mask64x8) Mask64x8
// LessMasked compares for less than.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x16) LessMasked(y Int8x16, z Mask8x16) Mask8x16
+func (x Int8x16) LessMasked(y Int8x16, mask Mask8x16) Mask8x16
// LessMasked compares for less than.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x32) LessMasked(y Int8x32, z Mask8x32) Mask8x32
+func (x Int8x32) LessMasked(y Int8x32, mask Mask8x32) Mask8x32
// LessMasked compares for less than.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x64) LessMasked(y Int8x64, z Mask8x64) Mask8x64
+func (x Int8x64) LessMasked(y Int8x64, mask Mask8x64) Mask8x64
// LessMasked compares for less than.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x8) LessMasked(y Int16x8, z Mask16x8) Mask16x8
+func (x Int16x8) LessMasked(y Int16x8, mask Mask16x8) Mask16x8
// LessMasked compares for less than.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x16) LessMasked(y Int16x16, z Mask16x16) Mask16x16
+func (x Int16x16) LessMasked(y Int16x16, mask Mask16x16) Mask16x16
// LessMasked compares for less than.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x32) LessMasked(y Int16x32, z Mask16x32) Mask16x32
+func (x Int16x32) LessMasked(y Int16x32, mask Mask16x32) Mask16x32
// LessMasked compares for less than.
//
// Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x4) LessMasked(y Int32x4, z Mask32x4) Mask32x4
+func (x Int32x4) LessMasked(y Int32x4, mask Mask32x4) Mask32x4
// LessMasked compares for less than.
//
// Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x8) LessMasked(y Int32x8, z Mask32x8) Mask32x8
+func (x Int32x8) LessMasked(y Int32x8, mask Mask32x8) Mask32x8
// LessMasked compares for less than.
//
// Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x16) LessMasked(y Int32x16, z Mask32x16) Mask32x16
+func (x Int32x16) LessMasked(y Int32x16, mask Mask32x16) Mask32x16
// LessMasked compares for less than.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x2) LessMasked(y Int64x2, z Mask64x2) Mask64x2
+func (x Int64x2) LessMasked(y Int64x2, mask Mask64x2) Mask64x2
// LessMasked compares for less than.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x4) LessMasked(y Int64x4, z Mask64x4) Mask64x4
+func (x Int64x4) LessMasked(y Int64x4, mask Mask64x4) Mask64x4
// LessMasked compares for less than.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x8) LessMasked(y Int64x8, z Mask64x8) Mask64x8
+func (x Int64x8) LessMasked(y Int64x8, mask Mask64x8) Mask64x8
// LessMasked compares for less than.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x16) LessMasked(y Uint8x16, z Mask8x16) Mask8x16
+func (x Uint8x16) LessMasked(y Uint8x16, mask Mask8x16) Mask8x16
// LessMasked compares for less than.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x32) LessMasked(y Uint8x32, z Mask8x32) Mask8x32
+func (x Uint8x32) LessMasked(y Uint8x32, mask Mask8x32) Mask8x32
// LessMasked compares for less than.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x64) LessMasked(y Uint8x64, z Mask8x64) Mask8x64
+func (x Uint8x64) LessMasked(y Uint8x64, mask Mask8x64) Mask8x64
// LessMasked compares for less than.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x8) LessMasked(y Uint16x8, z Mask16x8) Mask16x8
+func (x Uint16x8) LessMasked(y Uint16x8, mask Mask16x8) Mask16x8
// LessMasked compares for less than.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x16) LessMasked(y Uint16x16, z Mask16x16) Mask16x16
+func (x Uint16x16) LessMasked(y Uint16x16, mask Mask16x16) Mask16x16
// LessMasked compares for less than.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x32) LessMasked(y Uint16x32, z Mask16x32) Mask16x32
+func (x Uint16x32) LessMasked(y Uint16x32, mask Mask16x32) Mask16x32
// LessMasked compares for less than.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x4) LessMasked(y Uint32x4, z Mask32x4) Mask32x4
+func (x Uint32x4) LessMasked(y Uint32x4, mask Mask32x4) Mask32x4
// LessMasked compares for less than.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x8) LessMasked(y Uint32x8, z Mask32x8) Mask32x8
+func (x Uint32x8) LessMasked(y Uint32x8, mask Mask32x8) Mask32x8
// LessMasked compares for less than.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x16) LessMasked(y Uint32x16, z Mask32x16) Mask32x16
+func (x Uint32x16) LessMasked(y Uint32x16, mask Mask32x16) Mask32x16
// LessMasked compares for less than.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x2) LessMasked(y Uint64x2, z Mask64x2) Mask64x2
+func (x Uint64x2) LessMasked(y Uint64x2, mask Mask64x2) Mask64x2
// LessMasked compares for less than.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x4) LessMasked(y Uint64x4, z Mask64x4) Mask64x4
+func (x Uint64x4) LessMasked(y Uint64x4, mask Mask64x4) Mask64x4
// LessMasked compares for less than.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x8) LessMasked(y Uint64x8, z Mask64x8) Mask64x8
+func (x Uint64x8) LessMasked(y Uint64x8, mask Mask64x8) Mask64x8
/* Max */
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VMAXPS, CPU Feature: AVX512F
-func (x Float32x4) MaxMasked(y Float32x4, z Mask32x4) Float32x4
+func (x Float32x4) MaxMasked(y Float32x4, mask Mask32x4) Float32x4
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VMAXPS, CPU Feature: AVX512F
-func (x Float32x8) MaxMasked(y Float32x8, z Mask32x8) Float32x8
+func (x Float32x8) MaxMasked(y Float32x8, mask Mask32x8) Float32x8
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VMAXPS, CPU Feature: AVX512F
-func (x Float32x16) MaxMasked(y Float32x16, z Mask32x16) Float32x16
+func (x Float32x16) MaxMasked(y Float32x16, mask Mask32x16) Float32x16
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VMAXPD, CPU Feature: AVX512F
-func (x Float64x2) MaxMasked(y Float64x2, z Mask64x2) Float64x2
+func (x Float64x2) MaxMasked(y Float64x2, mask Mask64x2) Float64x2
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VMAXPD, CPU Feature: AVX512F
-func (x Float64x4) MaxMasked(y Float64x4, z Mask64x4) Float64x4
+func (x Float64x4) MaxMasked(y Float64x4, mask Mask64x4) Float64x4
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VMAXPD, CPU Feature: AVX512F
-func (x Float64x8) MaxMasked(y Float64x8, z Mask64x8) Float64x8
+func (x Float64x8) MaxMasked(y Float64x8, mask Mask64x8) Float64x8
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXSB, CPU Feature: AVX512BW
-func (x Int8x16) MaxMasked(y Int8x16, z Mask8x16) Int8x16
+func (x Int8x16) MaxMasked(y Int8x16, mask Mask8x16) Int8x16
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXSB, CPU Feature: AVX512BW
-func (x Int8x32) MaxMasked(y Int8x32, z Mask8x32) Int8x32
+func (x Int8x32) MaxMasked(y Int8x32, mask Mask8x32) Int8x32
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXSB, CPU Feature: AVX512BW
-func (x Int8x64) MaxMasked(y Int8x64, z Mask8x64) Int8x64
+func (x Int8x64) MaxMasked(y Int8x64, mask Mask8x64) Int8x64
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXSW, CPU Feature: AVX512BW
-func (x Int16x8) MaxMasked(y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) MaxMasked(y Int16x8, mask Mask16x8) Int16x8
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXSW, CPU Feature: AVX512BW
-func (x Int16x16) MaxMasked(y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) MaxMasked(y Int16x16, mask Mask16x16) Int16x16
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXSW, CPU Feature: AVX512BW
-func (x Int16x32) MaxMasked(y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) MaxMasked(y Int16x32, mask Mask16x32) Int16x32
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXSD, CPU Feature: AVX512F
-func (x Int32x4) MaxMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) MaxMasked(y Int32x4, mask Mask32x4) Int32x4
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXSD, CPU Feature: AVX512F
-func (x Int32x8) MaxMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) MaxMasked(y Int32x8, mask Mask32x8) Int32x8
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXSD, CPU Feature: AVX512F
-func (x Int32x16) MaxMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) MaxMasked(y Int32x16, mask Mask32x16) Int32x16
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXSQ, CPU Feature: AVX512F
-func (x Int64x2) MaxMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) MaxMasked(y Int64x2, mask Mask64x2) Int64x2
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXSQ, CPU Feature: AVX512F
-func (x Int64x4) MaxMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) MaxMasked(y Int64x4, mask Mask64x4) Int64x4
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXSQ, CPU Feature: AVX512F
-func (x Int64x8) MaxMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) MaxMasked(y Int64x8, mask Mask64x8) Int64x8
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXUB, CPU Feature: AVX512BW
-func (x Uint8x16) MaxMasked(y Uint8x16, z Mask8x16) Uint8x16
+func (x Uint8x16) MaxMasked(y Uint8x16, mask Mask8x16) Uint8x16
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXUB, CPU Feature: AVX512BW
-func (x Uint8x32) MaxMasked(y Uint8x32, z Mask8x32) Uint8x32
+func (x Uint8x32) MaxMasked(y Uint8x32, mask Mask8x32) Uint8x32
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXUB, CPU Feature: AVX512BW
-func (x Uint8x64) MaxMasked(y Uint8x64, z Mask8x64) Uint8x64
+func (x Uint8x64) MaxMasked(y Uint8x64, mask Mask8x64) Uint8x64
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXUW, CPU Feature: AVX512BW
-func (x Uint16x8) MaxMasked(y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) MaxMasked(y Uint16x8, mask Mask16x8) Uint16x8
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXUW, CPU Feature: AVX512BW
-func (x Uint16x16) MaxMasked(y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) MaxMasked(y Uint16x16, mask Mask16x16) Uint16x16
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXUW, CPU Feature: AVX512BW
-func (x Uint16x32) MaxMasked(y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) MaxMasked(y Uint16x32, mask Mask16x32) Uint16x32
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXUD, CPU Feature: AVX512F
-func (x Uint32x4) MaxMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) MaxMasked(y Uint32x4, mask Mask32x4) Uint32x4
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXUD, CPU Feature: AVX512F
-func (x Uint32x8) MaxMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) MaxMasked(y Uint32x8, mask Mask32x8) Uint32x8
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXUD, CPU Feature: AVX512F
-func (x Uint32x16) MaxMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) MaxMasked(y Uint32x16, mask Mask32x16) Uint32x16
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXUQ, CPU Feature: AVX512F
-func (x Uint64x2) MaxMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) MaxMasked(y Uint64x2, mask Mask64x2) Uint64x2
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXUQ, CPU Feature: AVX512F
-func (x Uint64x4) MaxMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) MaxMasked(y Uint64x4, mask Mask64x4) Uint64x4
// MaxMasked computes the maximum of corresponding elements.
//
// Asm: VPMAXUQ, CPU Feature: AVX512F
-func (x Uint64x8) MaxMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) MaxMasked(y Uint64x8, mask Mask64x8) Uint64x8
/* Min */
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VMINPS, CPU Feature: AVX512F
-func (x Float32x4) MinMasked(y Float32x4, z Mask32x4) Float32x4
+func (x Float32x4) MinMasked(y Float32x4, mask Mask32x4) Float32x4
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VMINPS, CPU Feature: AVX512F
-func (x Float32x8) MinMasked(y Float32x8, z Mask32x8) Float32x8
+func (x Float32x8) MinMasked(y Float32x8, mask Mask32x8) Float32x8
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VMINPS, CPU Feature: AVX512F
-func (x Float32x16) MinMasked(y Float32x16, z Mask32x16) Float32x16
+func (x Float32x16) MinMasked(y Float32x16, mask Mask32x16) Float32x16
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VMINPD, CPU Feature: AVX512F
-func (x Float64x2) MinMasked(y Float64x2, z Mask64x2) Float64x2
+func (x Float64x2) MinMasked(y Float64x2, mask Mask64x2) Float64x2
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VMINPD, CPU Feature: AVX512F
-func (x Float64x4) MinMasked(y Float64x4, z Mask64x4) Float64x4
+func (x Float64x4) MinMasked(y Float64x4, mask Mask64x4) Float64x4
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VMINPD, CPU Feature: AVX512F
-func (x Float64x8) MinMasked(y Float64x8, z Mask64x8) Float64x8
+func (x Float64x8) MinMasked(y Float64x8, mask Mask64x8) Float64x8
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINSB, CPU Feature: AVX512BW
-func (x Int8x16) MinMasked(y Int8x16, z Mask8x16) Int8x16
+func (x Int8x16) MinMasked(y Int8x16, mask Mask8x16) Int8x16
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINSB, CPU Feature: AVX512BW
-func (x Int8x32) MinMasked(y Int8x32, z Mask8x32) Int8x32
+func (x Int8x32) MinMasked(y Int8x32, mask Mask8x32) Int8x32
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINSB, CPU Feature: AVX512BW
-func (x Int8x64) MinMasked(y Int8x64, z Mask8x64) Int8x64
+func (x Int8x64) MinMasked(y Int8x64, mask Mask8x64) Int8x64
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINSW, CPU Feature: AVX512BW
-func (x Int16x8) MinMasked(y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) MinMasked(y Int16x8, mask Mask16x8) Int16x8
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINSW, CPU Feature: AVX512BW
-func (x Int16x16) MinMasked(y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) MinMasked(y Int16x16, mask Mask16x16) Int16x16
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINSW, CPU Feature: AVX512BW
-func (x Int16x32) MinMasked(y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) MinMasked(y Int16x32, mask Mask16x32) Int16x32
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINSD, CPU Feature: AVX512F
-func (x Int32x4) MinMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) MinMasked(y Int32x4, mask Mask32x4) Int32x4
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINSD, CPU Feature: AVX512F
-func (x Int32x8) MinMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) MinMasked(y Int32x8, mask Mask32x8) Int32x8
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINSD, CPU Feature: AVX512F
-func (x Int32x16) MinMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) MinMasked(y Int32x16, mask Mask32x16) Int32x16
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINSQ, CPU Feature: AVX512F
-func (x Int64x2) MinMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) MinMasked(y Int64x2, mask Mask64x2) Int64x2
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINSQ, CPU Feature: AVX512F
-func (x Int64x4) MinMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) MinMasked(y Int64x4, mask Mask64x4) Int64x4
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINSQ, CPU Feature: AVX512F
-func (x Int64x8) MinMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) MinMasked(y Int64x8, mask Mask64x8) Int64x8
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINUB, CPU Feature: AVX512BW
-func (x Uint8x16) MinMasked(y Uint8x16, z Mask8x16) Uint8x16
+func (x Uint8x16) MinMasked(y Uint8x16, mask Mask8x16) Uint8x16
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINUB, CPU Feature: AVX512BW
-func (x Uint8x32) MinMasked(y Uint8x32, z Mask8x32) Uint8x32
+func (x Uint8x32) MinMasked(y Uint8x32, mask Mask8x32) Uint8x32
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINUB, CPU Feature: AVX512BW
-func (x Uint8x64) MinMasked(y Uint8x64, z Mask8x64) Uint8x64
+func (x Uint8x64) MinMasked(y Uint8x64, mask Mask8x64) Uint8x64
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINUW, CPU Feature: AVX512BW
-func (x Uint16x8) MinMasked(y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) MinMasked(y Uint16x8, mask Mask16x8) Uint16x8
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINUW, CPU Feature: AVX512BW
-func (x Uint16x16) MinMasked(y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) MinMasked(y Uint16x16, mask Mask16x16) Uint16x16
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINUW, CPU Feature: AVX512BW
-func (x Uint16x32) MinMasked(y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) MinMasked(y Uint16x32, mask Mask16x32) Uint16x32
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINUD, CPU Feature: AVX512F
-func (x Uint32x4) MinMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) MinMasked(y Uint32x4, mask Mask32x4) Uint32x4
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINUD, CPU Feature: AVX512F
-func (x Uint32x8) MinMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) MinMasked(y Uint32x8, mask Mask32x8) Uint32x8
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINUD, CPU Feature: AVX512F
-func (x Uint32x16) MinMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) MinMasked(y Uint32x16, mask Mask32x16) Uint32x16
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINUQ, CPU Feature: AVX512F
-func (x Uint64x2) MinMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) MinMasked(y Uint64x2, mask Mask64x2) Uint64x2
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINUQ, CPU Feature: AVX512F
-func (x Uint64x4) MinMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) MinMasked(y Uint64x4, mask Mask64x4) Uint64x4
// MinMasked computes the minimum of corresponding elements.
//
// Asm: VPMINUQ, CPU Feature: AVX512F
-func (x Uint64x8) MinMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) MinMasked(y Uint64x8, mask Mask64x8) Uint64x8
/* Mul */
// MulByPowOf2Masked multiplies elements by a power of 2.
//
// Asm: VSCALEFPS, CPU Feature: AVX512F
-func (x Float32x4) MulByPowOf2Masked(y Float32x4, z Mask32x4) Float32x4
+func (x Float32x4) MulByPowOf2Masked(y Float32x4, mask Mask32x4) Float32x4
// MulByPowOf2Masked multiplies elements by a power of 2.
//
// Asm: VSCALEFPS, CPU Feature: AVX512F
-func (x Float32x8) MulByPowOf2Masked(y Float32x8, z Mask32x8) Float32x8
+func (x Float32x8) MulByPowOf2Masked(y Float32x8, mask Mask32x8) Float32x8
// MulByPowOf2Masked multiplies elements by a power of 2.
//
// Asm: VSCALEFPS, CPU Feature: AVX512F
-func (x Float32x16) MulByPowOf2Masked(y Float32x16, z Mask32x16) Float32x16
+func (x Float32x16) MulByPowOf2Masked(y Float32x16, mask Mask32x16) Float32x16
// MulByPowOf2Masked multiplies elements by a power of 2.
//
// Asm: VSCALEFPD, CPU Feature: AVX512F
-func (x Float64x2) MulByPowOf2Masked(y Float64x2, z Mask64x2) Float64x2
+func (x Float64x2) MulByPowOf2Masked(y Float64x2, mask Mask64x2) Float64x2
// MulByPowOf2Masked multiplies elements by a power of 2.
//
// Asm: VSCALEFPD, CPU Feature: AVX512F
-func (x Float64x4) MulByPowOf2Masked(y Float64x4, z Mask64x4) Float64x4
+func (x Float64x4) MulByPowOf2Masked(y Float64x4, mask Mask64x4) Float64x4
// MulByPowOf2Masked multiplies elements by a power of 2.
//
// Asm: VSCALEFPD, CPU Feature: AVX512F
-func (x Float64x8) MulByPowOf2Masked(y Float64x8, z Mask64x8) Float64x8
+func (x Float64x8) MulByPowOf2Masked(y Float64x8, mask Mask64x8) Float64x8
/* MulEvenWiden */
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULDQ, CPU Feature: AVX512F
-func (x Int64x2) MulEvenWidenMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) MulEvenWidenMasked(y Int64x2, mask Mask64x2) Int64x2
// MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULDQ, CPU Feature: AVX512F
-func (x Int64x4) MulEvenWidenMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) MulEvenWidenMasked(y Int64x4, mask Mask64x4) Int64x4
// MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULDQ, CPU Feature: AVX512F
-func (x Int64x8) MulEvenWidenMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) MulEvenWidenMasked(y Int64x8, mask Mask64x8) Int64x8
// MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULUDQ, CPU Feature: AVX512F
-func (x Uint64x2) MulEvenWidenMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) MulEvenWidenMasked(y Uint64x2, mask Mask64x2) Uint64x2
// MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULUDQ, CPU Feature: AVX512F
-func (x Uint64x4) MulEvenWidenMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) MulEvenWidenMasked(y Uint64x4, mask Mask64x4) Uint64x4
// MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULUDQ, CPU Feature: AVX512F
-func (x Uint64x8) MulEvenWidenMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) MulEvenWidenMasked(y Uint64x8, mask Mask64x8) Uint64x8
/* MulHigh */
// MulHighMasked multiplies elements and stores the high part of the result, masked.
//
// Asm: VPMULHW, CPU Feature: AVX512BW
-func (x Int16x8) MulHighMasked(y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) MulHighMasked(y Int16x8, mask Mask16x8) Int16x8
// MulHighMasked multiplies elements and stores the high part of the result, masked.
//
// Asm: VPMULHW, CPU Feature: AVX512BW
-func (x Int16x16) MulHighMasked(y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) MulHighMasked(y Int16x16, mask Mask16x16) Int16x16
// MulHighMasked multiplies elements and stores the high part of the result, masked.
//
// Asm: VPMULHW, CPU Feature: AVX512BW
-func (x Int16x32) MulHighMasked(y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) MulHighMasked(y Int16x32, mask Mask16x32) Int16x32
// MulHighMasked multiplies elements and stores the high part of the result, masked.
//
// Asm: VPMULHUW, CPU Feature: AVX512BW
-func (x Uint16x8) MulHighMasked(y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) MulHighMasked(y Uint16x8, mask Mask16x8) Uint16x8
// MulHighMasked multiplies elements and stores the high part of the result, masked.
//
// Asm: VPMULHUW, CPU Feature: AVX512BW
-func (x Uint16x16) MulHighMasked(y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) MulHighMasked(y Uint16x16, mask Mask16x16) Uint16x16
// MulHighMasked multiplies elements and stores the high part of the result, masked.
//
// Asm: VPMULHUW, CPU Feature: AVX512BW
-func (x Uint16x32) MulHighMasked(y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) MulHighMasked(y Uint16x32, mask Mask16x32) Uint16x32
/* MulLow */
// MulLowMasked multiplies elements and stores the low part of the result, masked.
//
// Asm: VPMULLW, CPU Feature: AVX512BW
-func (x Int16x8) MulLowMasked(y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) MulLowMasked(y Int16x8, mask Mask16x8) Int16x8
// MulLowMasked multiplies elements and stores the low part of the result, masked.
//
// Asm: VPMULLW, CPU Feature: AVX512BW
-func (x Int16x16) MulLowMasked(y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) MulLowMasked(y Int16x16, mask Mask16x16) Int16x16
// MulLowMasked multiplies elements and stores the low part of the result, masked.
//
// Asm: VPMULLW, CPU Feature: AVX512BW
-func (x Int16x32) MulLowMasked(y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) MulLowMasked(y Int16x32, mask Mask16x32) Int16x32
// MulLowMasked multiplies elements and stores the low part of the result, masked.
//
// Asm: VPMULLD, CPU Feature: AVX512F
-func (x Int32x4) MulLowMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) MulLowMasked(y Int32x4, mask Mask32x4) Int32x4
// MulLowMasked multiplies elements and stores the low part of the result, masked.
//
// Asm: VPMULLD, CPU Feature: AVX512F
-func (x Int32x8) MulLowMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) MulLowMasked(y Int32x8, mask Mask32x8) Int32x8
// MulLowMasked multiplies elements and stores the low part of the result, masked.
//
// Asm: VPMULLD, CPU Feature: AVX512F
-func (x Int32x16) MulLowMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) MulLowMasked(y Int32x16, mask Mask32x16) Int32x16
// MulLowMasked multiplies elements and stores the low part of the result, masked.
//
// Asm: VPMULLQ, CPU Feature: AVX512DQ
-func (x Int64x2) MulLowMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) MulLowMasked(y Int64x2, mask Mask64x2) Int64x2
// MulLowMasked multiplies elements and stores the low part of the result, masked.
//
// Asm: VPMULLQ, CPU Feature: AVX512DQ
-func (x Int64x4) MulLowMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) MulLowMasked(y Int64x4, mask Mask64x4) Int64x4
// MulLowMasked multiplies elements and stores the low part of the result, masked.
//
// Asm: VPMULLQ, CPU Feature: AVX512DQ
-func (x Int64x8) MulLowMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) MulLowMasked(y Int64x8, mask Mask64x8) Int64x8
/* MulMasked */
// MulMasked multiplies corresponding elements of two vectors, masked.
//
// Asm: VMULPS, CPU Feature: AVX512F
-func (x Float32x4) MulMasked(y Float32x4, z Mask32x4) Float32x4
+func (x Float32x4) MulMasked(y Float32x4, mask Mask32x4) Float32x4
// MulMasked multiplies corresponding elements of two vectors, masked.
//
// Asm: VMULPS, CPU Feature: AVX512F
-func (x Float32x8) MulMasked(y Float32x8, z Mask32x8) Float32x8
+func (x Float32x8) MulMasked(y Float32x8, mask Mask32x8) Float32x8
// MulMasked multiplies corresponding elements of two vectors, masked.
//
// Asm: VMULPS, CPU Feature: AVX512F
-func (x Float32x16) MulMasked(y Float32x16, z Mask32x16) Float32x16
+func (x Float32x16) MulMasked(y Float32x16, mask Mask32x16) Float32x16
// MulMasked multiplies corresponding elements of two vectors, masked.
//
// Asm: VMULPD, CPU Feature: AVX512F
-func (x Float64x2) MulMasked(y Float64x2, z Mask64x2) Float64x2
+func (x Float64x2) MulMasked(y Float64x2, mask Mask64x2) Float64x2
// MulMasked multiplies corresponding elements of two vectors, masked.
//
// Asm: VMULPD, CPU Feature: AVX512F
-func (x Float64x4) MulMasked(y Float64x4, z Mask64x4) Float64x4
+func (x Float64x4) MulMasked(y Float64x4, mask Mask64x4) Float64x4
// MulMasked multiplies corresponding elements of two vectors, masked.
//
// Asm: VMULPD, CPU Feature: AVX512F
-func (x Float64x8) MulMasked(y Float64x8, z Mask64x8) Float64x8
+func (x Float64x8) MulMasked(y Float64x8, mask Mask64x8) Float64x8
/* NotEqual */
// NotEqualMasked compares for inequality.
//
// Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x4) NotEqualMasked(y Float32x4, z Mask32x4) Mask32x4
+func (x Float32x4) NotEqualMasked(y Float32x4, mask Mask32x4) Mask32x4
// NotEqualMasked compares for inequality.
//
// Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x8) NotEqualMasked(y Float32x8, z Mask32x8) Mask32x8
+func (x Float32x8) NotEqualMasked(y Float32x8, mask Mask32x8) Mask32x8
// NotEqualMasked compares for inequality.
//
// Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x16) NotEqualMasked(y Float32x16, z Mask32x16) Mask32x16
+func (x Float32x16) NotEqualMasked(y Float32x16, mask Mask32x16) Mask32x16
// NotEqualMasked compares for inequality.
//
// Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x2) NotEqualMasked(y Float64x2, z Mask64x2) Mask64x2
+func (x Float64x2) NotEqualMasked(y Float64x2, mask Mask64x2) Mask64x2
// NotEqualMasked compares for inequality.
//
// Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x4) NotEqualMasked(y Float64x4, z Mask64x4) Mask64x4
+func (x Float64x4) NotEqualMasked(y Float64x4, mask Mask64x4) Mask64x4
// NotEqualMasked compares for inequality.
//
// Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x8) NotEqualMasked(y Float64x8, z Mask64x8) Mask64x8
+func (x Float64x8) NotEqualMasked(y Float64x8, mask Mask64x8) Mask64x8
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x16) NotEqualMasked(y Int8x16, z Mask8x16) Mask8x16
+func (x Int8x16) NotEqualMasked(y Int8x16, mask Mask8x16) Mask8x16
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x32) NotEqualMasked(y Int8x32, z Mask8x32) Mask8x32
+func (x Int8x32) NotEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x64) NotEqualMasked(y Int8x64, z Mask8x64) Mask8x64
+func (x Int8x64) NotEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x8) NotEqualMasked(y Int16x8, z Mask16x8) Mask16x8
+func (x Int16x8) NotEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x16) NotEqualMasked(y Int16x16, z Mask16x16) Mask16x16
+func (x Int16x16) NotEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x32) NotEqualMasked(y Int16x32, z Mask16x32) Mask16x32
+func (x Int16x32) NotEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x4) NotEqualMasked(y Int32x4, z Mask32x4) Mask32x4
+func (x Int32x4) NotEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x8) NotEqualMasked(y Int32x8, z Mask32x8) Mask32x8
+func (x Int32x8) NotEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x16) NotEqualMasked(y Int32x16, z Mask32x16) Mask32x16
+func (x Int32x16) NotEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x2) NotEqualMasked(y Int64x2, z Mask64x2) Mask64x2
+func (x Int64x2) NotEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x4) NotEqualMasked(y Int64x4, z Mask64x4) Mask64x4
+func (x Int64x4) NotEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x8) NotEqualMasked(y Int64x8, z Mask64x8) Mask64x8
+func (x Int64x8) NotEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x16) NotEqualMasked(y Uint8x16, z Mask8x16) Mask8x16
+func (x Uint8x16) NotEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x32) NotEqualMasked(y Uint8x32, z Mask8x32) Mask8x32
+func (x Uint8x32) NotEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x64) NotEqualMasked(y Uint8x64, z Mask8x64) Mask8x64
+func (x Uint8x64) NotEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x8) NotEqualMasked(y Uint16x8, z Mask16x8) Mask16x8
+func (x Uint16x8) NotEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x16) NotEqualMasked(y Uint16x16, z Mask16x16) Mask16x16
+func (x Uint16x16) NotEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x32) NotEqualMasked(y Uint16x32, z Mask16x32) Mask16x32
+func (x Uint16x32) NotEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x4) NotEqualMasked(y Uint32x4, z Mask32x4) Mask32x4
+func (x Uint32x4) NotEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x8) NotEqualMasked(y Uint32x8, z Mask32x8) Mask32x8
+func (x Uint32x8) NotEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x16) NotEqualMasked(y Uint32x16, z Mask32x16) Mask32x16
+func (x Uint32x16) NotEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x2) NotEqualMasked(y Uint64x2, z Mask64x2) Mask64x2
+func (x Uint64x2) NotEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x4) NotEqualMasked(y Uint64x4, z Mask64x4) Mask64x4
+func (x Uint64x4) NotEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
// NotEqualMasked compares for inequality.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x8) NotEqualMasked(y Uint64x8, z Mask64x8) Mask64x8
+func (x Uint64x8) NotEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
/* Or */
// OrMasked performs a masked bitwise OR operation between two vectors.
//
// Asm: VPORD, CPU Feature: AVX512F
-func (x Int32x4) OrMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) OrMasked(y Int32x4, mask Mask32x4) Int32x4
// OrMasked performs a masked bitwise OR operation between two vectors.
//
// Asm: VPORD, CPU Feature: AVX512F
-func (x Int32x8) OrMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) OrMasked(y Int32x8, mask Mask32x8) Int32x8
// OrMasked performs a masked bitwise OR operation between two vectors.
//
// Asm: VPORD, CPU Feature: AVX512F
-func (x Int32x16) OrMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) OrMasked(y Int32x16, mask Mask32x16) Int32x16
// OrMasked performs a masked bitwise OR operation between two vectors.
//
// Asm: VPORQ, CPU Feature: AVX512F
-func (x Int64x2) OrMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) OrMasked(y Int64x2, mask Mask64x2) Int64x2
// OrMasked performs a masked bitwise OR operation between two vectors.
//
// Asm: VPORQ, CPU Feature: AVX512F
-func (x Int64x4) OrMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) OrMasked(y Int64x4, mask Mask64x4) Int64x4
// OrMasked performs a masked bitwise OR operation between two vectors.
//
// Asm: VPORQ, CPU Feature: AVX512F
-func (x Int64x8) OrMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) OrMasked(y Int64x8, mask Mask64x8) Int64x8
// OrMasked performs a masked bitwise OR operation between two vectors.
//
// Asm: VPORD, CPU Feature: AVX512F
-func (x Uint32x4) OrMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) OrMasked(y Uint32x4, mask Mask32x4) Uint32x4
// OrMasked performs a masked bitwise OR operation between two vectors.
//
// Asm: VPORD, CPU Feature: AVX512F
-func (x Uint32x8) OrMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) OrMasked(y Uint32x8, mask Mask32x8) Uint32x8
// OrMasked performs a masked bitwise OR operation between two vectors.
//
// Asm: VPORD, CPU Feature: AVX512F
-func (x Uint32x16) OrMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) OrMasked(y Uint32x16, mask Mask32x16) Uint32x16
// OrMasked performs a masked bitwise OR operation between two vectors.
//
// Asm: VPORQ, CPU Feature: AVX512F
-func (x Uint64x2) OrMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) OrMasked(y Uint64x2, mask Mask64x2) Uint64x2
// OrMasked performs a masked bitwise OR operation between two vectors.
//
// Asm: VPORQ, CPU Feature: AVX512F
-func (x Uint64x4) OrMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) OrMasked(y Uint64x4, mask Mask64x4) Uint64x4
// OrMasked performs a masked bitwise OR operation between two vectors.
//
// Asm: VPORQ, CPU Feature: AVX512F
-func (x Uint64x8) OrMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) OrMasked(y Uint64x8, mask Mask64x8) Uint64x8
/* PairDotProd */
// PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
//
// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
-func (x Int32x4) PairDotProdAccumulateMasked(y Int16x8, z Int16x8, u Mask32x4) Int32x4
+func (x Int32x4) PairDotProdAccumulateMasked(y Int16x8, z Int16x8, mask Mask32x4) Int32x4
// PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
//
// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
-func (x Int32x8) PairDotProdAccumulateMasked(y Int16x16, z Int16x16, u Mask32x8) Int32x8
+func (x Int32x8) PairDotProdAccumulateMasked(y Int16x16, z Int16x16, mask Mask32x8) Int32x8
// PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
//
// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
-func (x Int32x16) PairDotProdAccumulateMasked(y Int16x32, z Int16x32, u Mask32x16) Int32x16
+func (x Int32x16) PairDotProdAccumulateMasked(y Int16x32, z Int16x32, mask Mask32x16) Int32x16
/* PairDotProdMasked */
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDWD, CPU Feature: AVX512BW
-func (x Int16x8) PairDotProdMasked(y Int16x8, z Mask16x8) Int32x4
+func (x Int16x8) PairDotProdMasked(y Int16x8, mask Mask16x8) Int32x4
// PairDotProdMasked multiplies the elements and add the pairs together,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDWD, CPU Feature: AVX512BW
-func (x Int16x16) PairDotProdMasked(y Int16x16, z Mask16x16) Int32x8
+func (x Int16x16) PairDotProdMasked(y Int16x16, mask Mask16x16) Int32x8
// PairDotProdMasked multiplies the elements and add the pairs together,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDWD, CPU Feature: AVX512BW
-func (x Int16x32) PairDotProdMasked(y Int16x32, z Mask16x32) Int32x16
+func (x Int16x32) PairDotProdMasked(y Int16x32, mask Mask16x32) Int32x16
/* PairwiseAdd */
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x16) Permute2Masked(y Int8x16, indices Uint8x16, u Mask8x16) Int8x16
+func (x Int8x16) Permute2Masked(y Int8x16, indices Uint8x16, mask Mask8x16) Int8x16
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x16) Permute2Masked(y Uint8x16, indices Uint8x16, u Mask8x16) Uint8x16
+func (x Uint8x16) Permute2Masked(y Uint8x16, indices Uint8x16, mask Mask8x16) Uint8x16
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x32) Permute2Masked(y Int8x32, indices Uint8x32, u Mask8x32) Int8x32
+func (x Int8x32) Permute2Masked(y Int8x32, indices Uint8x32, mask Mask8x32) Int8x32
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x32) Permute2Masked(y Uint8x32, indices Uint8x32, u Mask8x32) Uint8x32
+func (x Uint8x32) Permute2Masked(y Uint8x32, indices Uint8x32, mask Mask8x32) Uint8x32
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x64) Permute2Masked(y Int8x64, indices Uint8x64, u Mask8x64) Int8x64
+func (x Int8x64) Permute2Masked(y Int8x64, indices Uint8x64, mask Mask8x64) Int8x64
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x64) Permute2Masked(y Uint8x64, indices Uint8x64, u Mask8x64) Uint8x64
+func (x Uint8x64) Permute2Masked(y Uint8x64, indices Uint8x64, mask Mask8x64) Uint8x64
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512BW
-func (x Int16x8) Permute2Masked(y Int16x8, indices Uint16x8, u Mask16x8) Int16x8
+func (x Int16x8) Permute2Masked(y Int16x8, indices Uint16x8, mask Mask16x8) Int16x8
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512BW
-func (x Uint16x8) Permute2Masked(y Uint16x8, indices Uint16x8, u Mask16x8) Uint16x8
+func (x Uint16x8) Permute2Masked(y Uint16x8, indices Uint16x8, mask Mask16x8) Uint16x8
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512BW
-func (x Int16x16) Permute2Masked(y Int16x16, indices Uint16x16, u Mask16x16) Int16x16
+func (x Int16x16) Permute2Masked(y Int16x16, indices Uint16x16, mask Mask16x16) Int16x16
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512BW
-func (x Uint16x16) Permute2Masked(y Uint16x16, indices Uint16x16, u Mask16x16) Uint16x16
+func (x Uint16x16) Permute2Masked(y Uint16x16, indices Uint16x16, mask Mask16x16) Uint16x16
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512BW
-func (x Int16x32) Permute2Masked(y Int16x32, indices Uint16x32, u Mask16x32) Int16x32
+func (x Int16x32) Permute2Masked(y Int16x32, indices Uint16x32, mask Mask16x32) Int16x32
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512BW
-func (x Uint16x32) Permute2Masked(y Uint16x32, indices Uint16x32, u Mask16x32) Uint16x32
+func (x Uint16x32) Permute2Masked(y Uint16x32, indices Uint16x32, mask Mask16x32) Uint16x32
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PS, CPU Feature: AVX512F
-func (x Float32x4) Permute2Masked(y Float32x4, indices Uint32x4, u Mask32x4) Float32x4
+func (x Float32x4) Permute2Masked(y Float32x4, indices Uint32x4, mask Mask32x4) Float32x4
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512F
-func (x Int32x4) Permute2Masked(y Int32x4, indices Uint32x4, u Mask32x4) Int32x4
+func (x Int32x4) Permute2Masked(y Int32x4, indices Uint32x4, mask Mask32x4) Int32x4
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512F
-func (x Uint32x4) Permute2Masked(y Uint32x4, indices Uint32x4, u Mask32x4) Uint32x4
+func (x Uint32x4) Permute2Masked(y Uint32x4, indices Uint32x4, mask Mask32x4) Uint32x4
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PS, CPU Feature: AVX512F
-func (x Float32x8) Permute2Masked(y Float32x8, indices Uint32x8, u Mask32x8) Float32x8
+func (x Float32x8) Permute2Masked(y Float32x8, indices Uint32x8, mask Mask32x8) Float32x8
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512F
-func (x Int32x8) Permute2Masked(y Int32x8, indices Uint32x8, u Mask32x8) Int32x8
+func (x Int32x8) Permute2Masked(y Int32x8, indices Uint32x8, mask Mask32x8) Int32x8
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512F
-func (x Uint32x8) Permute2Masked(y Uint32x8, indices Uint32x8, u Mask32x8) Uint32x8
+func (x Uint32x8) Permute2Masked(y Uint32x8, indices Uint32x8, mask Mask32x8) Uint32x8
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PS, CPU Feature: AVX512F
-func (x Float32x16) Permute2Masked(y Float32x16, indices Uint32x16, u Mask32x16) Float32x16
+func (x Float32x16) Permute2Masked(y Float32x16, indices Uint32x16, mask Mask32x16) Float32x16
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512F
-func (x Int32x16) Permute2Masked(y Int32x16, indices Uint32x16, u Mask32x16) Int32x16
+func (x Int32x16) Permute2Masked(y Int32x16, indices Uint32x16, mask Mask32x16) Int32x16
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512F
-func (x Uint32x16) Permute2Masked(y Uint32x16, indices Uint32x16, u Mask32x16) Uint32x16
+func (x Uint32x16) Permute2Masked(y Uint32x16, indices Uint32x16, mask Mask32x16) Uint32x16
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PD, CPU Feature: AVX512F
-func (x Float64x2) Permute2Masked(y Float64x2, indices Uint64x2, u Mask64x2) Float64x2
+func (x Float64x2) Permute2Masked(y Float64x2, indices Uint64x2, mask Mask64x2) Float64x2
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512F
-func (x Int64x2) Permute2Masked(y Int64x2, indices Uint64x2, u Mask64x2) Int64x2
+func (x Int64x2) Permute2Masked(y Int64x2, indices Uint64x2, mask Mask64x2) Int64x2
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512F
-func (x Uint64x2) Permute2Masked(y Uint64x2, indices Uint64x2, u Mask64x2) Uint64x2
+func (x Uint64x2) Permute2Masked(y Uint64x2, indices Uint64x2, mask Mask64x2) Uint64x2
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PD, CPU Feature: AVX512F
-func (x Float64x4) Permute2Masked(y Float64x4, indices Uint64x4, u Mask64x4) Float64x4
+func (x Float64x4) Permute2Masked(y Float64x4, indices Uint64x4, mask Mask64x4) Float64x4
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512F
-func (x Int64x4) Permute2Masked(y Int64x4, indices Uint64x4, u Mask64x4) Int64x4
+func (x Int64x4) Permute2Masked(y Int64x4, indices Uint64x4, mask Mask64x4) Int64x4
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512F
-func (x Uint64x4) Permute2Masked(y Uint64x4, indices Uint64x4, u Mask64x4) Uint64x4
+func (x Uint64x4) Permute2Masked(y Uint64x4, indices Uint64x4, mask Mask64x4) Uint64x4
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PD, CPU Feature: AVX512F
-func (x Float64x8) Permute2Masked(y Float64x8, indices Uint64x8, u Mask64x8) Float64x8
+func (x Float64x8) Permute2Masked(y Float64x8, indices Uint64x8, mask Mask64x8) Float64x8
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512F
-func (x Int64x8) Permute2Masked(y Int64x8, indices Uint64x8, u Mask64x8) Int64x8
+func (x Int64x8) Permute2Masked(y Int64x8, indices Uint64x8, mask Mask64x8) Int64x8
// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512F
-func (x Uint64x8) Permute2Masked(y Uint64x8, indices Uint64x8, u Mask64x8) Uint64x8
+func (x Uint64x8) Permute2Masked(y Uint64x8, indices Uint64x8, mask Mask64x8) Uint64x8
/* PermuteMasked */
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x16) PermuteMasked(indices Uint8x16, z Mask8x16) Int8x16
+func (x Int8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Int8x16
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x16) PermuteMasked(indices Uint8x16, z Mask8x16) Uint8x16
+func (x Uint8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Uint8x16
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x32) PermuteMasked(indices Uint8x32, z Mask8x32) Int8x32
+func (x Int8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Int8x32
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x32) PermuteMasked(indices Uint8x32, z Mask8x32) Uint8x32
+func (x Uint8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Uint8x32
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x64) PermuteMasked(indices Uint8x64, z Mask8x64) Int8x64
+func (x Int8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Int8x64
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x64) PermuteMasked(indices Uint8x64, z Mask8x64) Uint8x64
+func (x Uint8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Uint8x64
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMW, CPU Feature: AVX512BW
-func (x Int16x8) PermuteMasked(indices Uint16x8, z Mask16x8) Int16x8
+func (x Int16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Int16x8
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMW, CPU Feature: AVX512BW
-func (x Uint16x8) PermuteMasked(indices Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Uint16x8
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMW, CPU Feature: AVX512BW
-func (x Int16x16) PermuteMasked(indices Uint16x16, z Mask16x16) Int16x16
+func (x Int16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Int16x16
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMW, CPU Feature: AVX512BW
-func (x Uint16x16) PermuteMasked(indices Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Uint16x16
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMW, CPU Feature: AVX512BW
-func (x Int16x32) PermuteMasked(indices Uint16x32, z Mask16x32) Int16x32
+func (x Int16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Int16x32
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMW, CPU Feature: AVX512BW
-func (x Uint16x32) PermuteMasked(indices Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Uint16x32
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMPS, CPU Feature: AVX512F
-func (x Float32x8) PermuteMasked(indices Uint32x8, z Mask32x8) Float32x8
+func (x Float32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Float32x8
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMD, CPU Feature: AVX512F
-func (x Int32x8) PermuteMasked(indices Uint32x8, z Mask32x8) Int32x8
+func (x Int32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Int32x8
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMD, CPU Feature: AVX512F
-func (x Uint32x8) PermuteMasked(indices Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Uint32x8
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMPS, CPU Feature: AVX512F
-func (x Float32x16) PermuteMasked(indices Uint32x16, z Mask32x16) Float32x16
+func (x Float32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Float32x16
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMD, CPU Feature: AVX512F
-func (x Int32x16) PermuteMasked(indices Uint32x16, z Mask32x16) Int32x16
+func (x Int32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Int32x16
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMD, CPU Feature: AVX512F
-func (x Uint32x16) PermuteMasked(indices Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Uint32x16
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMPD, CPU Feature: AVX512F
-func (x Float64x4) PermuteMasked(indices Uint64x4, z Mask64x4) Float64x4
+func (x Float64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Float64x4
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMQ, CPU Feature: AVX512F
-func (x Int64x4) PermuteMasked(indices Uint64x4, z Mask64x4) Int64x4
+func (x Int64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Int64x4
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMQ, CPU Feature: AVX512F
-func (x Uint64x4) PermuteMasked(indices Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Uint64x4
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMPD, CPU Feature: AVX512F
-func (x Float64x8) PermuteMasked(indices Uint64x8, z Mask64x8) Float64x8
+func (x Float64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Float64x8
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMQ, CPU Feature: AVX512F
-func (x Int64x8) PermuteMasked(indices Uint64x8, z Mask64x8) Int64x8
+func (x Int64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Int64x8
// PermuteMasked performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMQ, CPU Feature: AVX512F
-func (x Uint64x8) PermuteMasked(indices Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Uint64x8
/* PopCount */
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x16) PopCountMasked(y Mask8x16) Int8x16
+func (x Int8x16) PopCountMasked(mask Mask8x16) Int8x16
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x32) PopCountMasked(y Mask8x32) Int8x32
+func (x Int8x32) PopCountMasked(mask Mask8x32) Int8x32
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x64) PopCountMasked(y Mask8x64) Int8x64
+func (x Int8x64) PopCountMasked(mask Mask8x64) Int8x64
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x8) PopCountMasked(y Mask16x8) Int16x8
+func (x Int16x8) PopCountMasked(mask Mask16x8) Int16x8
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x16) PopCountMasked(y Mask16x16) Int16x16
+func (x Int16x16) PopCountMasked(mask Mask16x16) Int16x16
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x32) PopCountMasked(y Mask16x32) Int16x32
+func (x Int16x32) PopCountMasked(mask Mask16x32) Int16x32
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x4) PopCountMasked(y Mask32x4) Int32x4
+func (x Int32x4) PopCountMasked(mask Mask32x4) Int32x4
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x8) PopCountMasked(y Mask32x8) Int32x8
+func (x Int32x8) PopCountMasked(mask Mask32x8) Int32x8
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x16) PopCountMasked(y Mask32x16) Int32x16
+func (x Int32x16) PopCountMasked(mask Mask32x16) Int32x16
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x2) PopCountMasked(y Mask64x2) Int64x2
+func (x Int64x2) PopCountMasked(mask Mask64x2) Int64x2
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x4) PopCountMasked(y Mask64x4) Int64x4
+func (x Int64x4) PopCountMasked(mask Mask64x4) Int64x4
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x8) PopCountMasked(y Mask64x8) Int64x8
+func (x Int64x8) PopCountMasked(mask Mask64x8) Int64x8
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x16) PopCountMasked(y Mask8x16) Uint8x16
+func (x Uint8x16) PopCountMasked(mask Mask8x16) Uint8x16
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x32) PopCountMasked(y Mask8x32) Uint8x32
+func (x Uint8x32) PopCountMasked(mask Mask8x32) Uint8x32
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x64) PopCountMasked(y Mask8x64) Uint8x64
+func (x Uint8x64) PopCountMasked(mask Mask8x64) Uint8x64
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x8) PopCountMasked(y Mask16x8) Uint16x8
+func (x Uint16x8) PopCountMasked(mask Mask16x8) Uint16x8
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x16) PopCountMasked(y Mask16x16) Uint16x16
+func (x Uint16x16) PopCountMasked(mask Mask16x16) Uint16x16
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x32) PopCountMasked(y Mask16x32) Uint16x32
+func (x Uint16x32) PopCountMasked(mask Mask16x32) Uint16x32
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x4) PopCountMasked(y Mask32x4) Uint32x4
+func (x Uint32x4) PopCountMasked(mask Mask32x4) Uint32x4
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x8) PopCountMasked(y Mask32x8) Uint32x8
+func (x Uint32x8) PopCountMasked(mask Mask32x8) Uint32x8
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x16) PopCountMasked(y Mask32x16) Uint32x16
+func (x Uint32x16) PopCountMasked(mask Mask32x16) Uint32x16
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x2) PopCountMasked(y Mask64x2) Uint64x2
+func (x Uint64x2) PopCountMasked(mask Mask64x2) Uint64x2
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x4) PopCountMasked(y Mask64x4) Uint64x4
+func (x Uint64x4) PopCountMasked(mask Mask64x4) Uint64x4
// PopCountMasked counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x8) PopCountMasked(y Mask64x8) Uint64x8
+func (x Uint64x8) PopCountMasked(mask Mask64x8) Uint64x8
/* RotateAllLeft */
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLD, CPU Feature: AVX512F
-func (x Int32x4) RotateAllLeftMasked(shift uint8, y Mask32x4) Int32x4
+func (x Int32x4) RotateAllLeftMasked(shift uint8, mask Mask32x4) Int32x4
// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLD, CPU Feature: AVX512F
-func (x Int32x8) RotateAllLeftMasked(shift uint8, y Mask32x8) Int32x8
+func (x Int32x8) RotateAllLeftMasked(shift uint8, mask Mask32x8) Int32x8
// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLD, CPU Feature: AVX512F
-func (x Int32x16) RotateAllLeftMasked(shift uint8, y Mask32x16) Int32x16
+func (x Int32x16) RotateAllLeftMasked(shift uint8, mask Mask32x16) Int32x16
// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLQ, CPU Feature: AVX512F
-func (x Int64x2) RotateAllLeftMasked(shift uint8, y Mask64x2) Int64x2
+func (x Int64x2) RotateAllLeftMasked(shift uint8, mask Mask64x2) Int64x2
// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLQ, CPU Feature: AVX512F
-func (x Int64x4) RotateAllLeftMasked(shift uint8, y Mask64x4) Int64x4
+func (x Int64x4) RotateAllLeftMasked(shift uint8, mask Mask64x4) Int64x4
// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLQ, CPU Feature: AVX512F
-func (x Int64x8) RotateAllLeftMasked(shift uint8, y Mask64x8) Int64x8
+func (x Int64x8) RotateAllLeftMasked(shift uint8, mask Mask64x8) Int64x8
// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLD, CPU Feature: AVX512F
-func (x Uint32x4) RotateAllLeftMasked(shift uint8, y Mask32x4) Uint32x4
+func (x Uint32x4) RotateAllLeftMasked(shift uint8, mask Mask32x4) Uint32x4
// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLD, CPU Feature: AVX512F
-func (x Uint32x8) RotateAllLeftMasked(shift uint8, y Mask32x8) Uint32x8
+func (x Uint32x8) RotateAllLeftMasked(shift uint8, mask Mask32x8) Uint32x8
// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLD, CPU Feature: AVX512F
-func (x Uint32x16) RotateAllLeftMasked(shift uint8, y Mask32x16) Uint32x16
+func (x Uint32x16) RotateAllLeftMasked(shift uint8, mask Mask32x16) Uint32x16
// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLQ, CPU Feature: AVX512F
-func (x Uint64x2) RotateAllLeftMasked(shift uint8, y Mask64x2) Uint64x2
+func (x Uint64x2) RotateAllLeftMasked(shift uint8, mask Mask64x2) Uint64x2
// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLQ, CPU Feature: AVX512F
-func (x Uint64x4) RotateAllLeftMasked(shift uint8, y Mask64x4) Uint64x4
+func (x Uint64x4) RotateAllLeftMasked(shift uint8, mask Mask64x4) Uint64x4
// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLQ, CPU Feature: AVX512F
-func (x Uint64x8) RotateAllLeftMasked(shift uint8, y Mask64x8) Uint64x8
+func (x Uint64x8) RotateAllLeftMasked(shift uint8, mask Mask64x8) Uint64x8
/* RotateAllRight */
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORD, CPU Feature: AVX512F
-func (x Int32x4) RotateAllRightMasked(shift uint8, y Mask32x4) Int32x4
+func (x Int32x4) RotateAllRightMasked(shift uint8, mask Mask32x4) Int32x4
// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORD, CPU Feature: AVX512F
-func (x Int32x8) RotateAllRightMasked(shift uint8, y Mask32x8) Int32x8
+func (x Int32x8) RotateAllRightMasked(shift uint8, mask Mask32x8) Int32x8
// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORD, CPU Feature: AVX512F
-func (x Int32x16) RotateAllRightMasked(shift uint8, y Mask32x16) Int32x16
+func (x Int32x16) RotateAllRightMasked(shift uint8, mask Mask32x16) Int32x16
// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORQ, CPU Feature: AVX512F
-func (x Int64x2) RotateAllRightMasked(shift uint8, y Mask64x2) Int64x2
+func (x Int64x2) RotateAllRightMasked(shift uint8, mask Mask64x2) Int64x2
// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORQ, CPU Feature: AVX512F
-func (x Int64x4) RotateAllRightMasked(shift uint8, y Mask64x4) Int64x4
+func (x Int64x4) RotateAllRightMasked(shift uint8, mask Mask64x4) Int64x4
// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORQ, CPU Feature: AVX512F
-func (x Int64x8) RotateAllRightMasked(shift uint8, y Mask64x8) Int64x8
+func (x Int64x8) RotateAllRightMasked(shift uint8, mask Mask64x8) Int64x8
// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORD, CPU Feature: AVX512F
-func (x Uint32x4) RotateAllRightMasked(shift uint8, y Mask32x4) Uint32x4
+func (x Uint32x4) RotateAllRightMasked(shift uint8, mask Mask32x4) Uint32x4
// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORD, CPU Feature: AVX512F
-func (x Uint32x8) RotateAllRightMasked(shift uint8, y Mask32x8) Uint32x8
+func (x Uint32x8) RotateAllRightMasked(shift uint8, mask Mask32x8) Uint32x8
// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORD, CPU Feature: AVX512F
-func (x Uint32x16) RotateAllRightMasked(shift uint8, y Mask32x16) Uint32x16
+func (x Uint32x16) RotateAllRightMasked(shift uint8, mask Mask32x16) Uint32x16
// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORQ, CPU Feature: AVX512F
-func (x Uint64x2) RotateAllRightMasked(shift uint8, y Mask64x2) Uint64x2
+func (x Uint64x2) RotateAllRightMasked(shift uint8, mask Mask64x2) Uint64x2
// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORQ, CPU Feature: AVX512F
-func (x Uint64x4) RotateAllRightMasked(shift uint8, y Mask64x4) Uint64x4
+func (x Uint64x4) RotateAllRightMasked(shift uint8, mask Mask64x4) Uint64x4
// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORQ, CPU Feature: AVX512F
-func (x Uint64x8) RotateAllRightMasked(shift uint8, y Mask64x8) Uint64x8
+func (x Uint64x8) RotateAllRightMasked(shift uint8, mask Mask64x8) Uint64x8
/* RotateLeft */
// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512F
-func (x Int32x4) RotateLeftMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) RotateLeftMasked(y Int32x4, mask Mask32x4) Int32x4
// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512F
-func (x Int32x8) RotateLeftMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) RotateLeftMasked(y Int32x8, mask Mask32x8) Int32x8
// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512F
-func (x Int32x16) RotateLeftMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) RotateLeftMasked(y Int32x16, mask Mask32x16) Int32x16
// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512F
-func (x Int64x2) RotateLeftMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) RotateLeftMasked(y Int64x2, mask Mask64x2) Int64x2
// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512F
-func (x Int64x4) RotateLeftMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) RotateLeftMasked(y Int64x4, mask Mask64x4) Int64x4
// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512F
-func (x Int64x8) RotateLeftMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) RotateLeftMasked(y Int64x8, mask Mask64x8) Int64x8
// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512F
-func (x Uint32x4) RotateLeftMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) RotateLeftMasked(y Uint32x4, mask Mask32x4) Uint32x4
// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512F
-func (x Uint32x8) RotateLeftMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) RotateLeftMasked(y Uint32x8, mask Mask32x8) Uint32x8
// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512F
-func (x Uint32x16) RotateLeftMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) RotateLeftMasked(y Uint32x16, mask Mask32x16) Uint32x16
// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512F
-func (x Uint64x2) RotateLeftMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) RotateLeftMasked(y Uint64x2, mask Mask64x2) Uint64x2
// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512F
-func (x Uint64x4) RotateLeftMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) RotateLeftMasked(y Uint64x4, mask Mask64x4) Uint64x4
// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512F
-func (x Uint64x8) RotateLeftMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) RotateLeftMasked(y Uint64x8, mask Mask64x8) Uint64x8
/* RotateRight */
// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512F
-func (x Int32x4) RotateRightMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) RotateRightMasked(y Int32x4, mask Mask32x4) Int32x4
// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512F
-func (x Int32x8) RotateRightMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) RotateRightMasked(y Int32x8, mask Mask32x8) Int32x8
// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512F
-func (x Int32x16) RotateRightMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) RotateRightMasked(y Int32x16, mask Mask32x16) Int32x16
// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512F
-func (x Int64x2) RotateRightMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) RotateRightMasked(y Int64x2, mask Mask64x2) Int64x2
// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512F
-func (x Int64x4) RotateRightMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) RotateRightMasked(y Int64x4, mask Mask64x4) Int64x4
// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512F
-func (x Int64x8) RotateRightMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) RotateRightMasked(y Int64x8, mask Mask64x8) Int64x8
// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512F
-func (x Uint32x4) RotateRightMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) RotateRightMasked(y Uint32x4, mask Mask32x4) Uint32x4
// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512F
-func (x Uint32x8) RotateRightMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) RotateRightMasked(y Uint32x8, mask Mask32x8) Uint32x8
// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512F
-func (x Uint32x16) RotateRightMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) RotateRightMasked(y Uint32x16, mask Mask32x16) Uint32x16
// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512F
-func (x Uint64x2) RotateRightMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) RotateRightMasked(y Uint64x2, mask Mask64x2) Uint64x2
// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512F
-func (x Uint64x4) RotateRightMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) RotateRightMasked(y Uint64x4, mask Mask64x4) Uint64x4
// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512F
-func (x Uint64x8) RotateRightMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) RotateRightMasked(y Uint64x8, mask Mask64x8) Uint64x8
/* Round */
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x4) RoundWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
+func (x Float32x4) RoundWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
// RoundWithPrecisionMasked rounds elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x8) RoundWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
+func (x Float32x8) RoundWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
// RoundWithPrecisionMasked rounds elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x16) RoundWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
+func (x Float32x16) RoundWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
// RoundWithPrecisionMasked rounds elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x2) RoundWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
+func (x Float64x2) RoundWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
// RoundWithPrecisionMasked rounds elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x4) RoundWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
+func (x Float64x4) RoundWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
// RoundWithPrecisionMasked rounds elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x8) RoundWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
+func (x Float64x8) RoundWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
/* SaturatedAdd */
// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSB, CPU Feature: AVX512BW
-func (x Int8x16) SaturatedAddMasked(y Int8x16, z Mask8x16) Int8x16
+func (x Int8x16) SaturatedAddMasked(y Int8x16, mask Mask8x16) Int8x16
// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSB, CPU Feature: AVX512BW
-func (x Int8x32) SaturatedAddMasked(y Int8x32, z Mask8x32) Int8x32
+func (x Int8x32) SaturatedAddMasked(y Int8x32, mask Mask8x32) Int8x32
// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSB, CPU Feature: AVX512BW
-func (x Int8x64) SaturatedAddMasked(y Int8x64, z Mask8x64) Int8x64
+func (x Int8x64) SaturatedAddMasked(y Int8x64, mask Mask8x64) Int8x64
// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSW, CPU Feature: AVX512BW
-func (x Int16x8) SaturatedAddMasked(y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) SaturatedAddMasked(y Int16x8, mask Mask16x8) Int16x8
// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSW, CPU Feature: AVX512BW
-func (x Int16x16) SaturatedAddMasked(y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) SaturatedAddMasked(y Int16x16, mask Mask16x16) Int16x16
// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSW, CPU Feature: AVX512BW
-func (x Int16x32) SaturatedAddMasked(y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) SaturatedAddMasked(y Int16x32, mask Mask16x32) Int16x32
// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSB, CPU Feature: AVX512BW
-func (x Uint8x16) SaturatedAddMasked(y Uint8x16, z Mask8x16) Uint8x16
+func (x Uint8x16) SaturatedAddMasked(y Uint8x16, mask Mask8x16) Uint8x16
// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSB, CPU Feature: AVX512BW
-func (x Uint8x32) SaturatedAddMasked(y Uint8x32, z Mask8x32) Uint8x32
+func (x Uint8x32) SaturatedAddMasked(y Uint8x32, mask Mask8x32) Uint8x32
// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSB, CPU Feature: AVX512BW
-func (x Uint8x64) SaturatedAddMasked(y Uint8x64, z Mask8x64) Uint8x64
+func (x Uint8x64) SaturatedAddMasked(y Uint8x64, mask Mask8x64) Uint8x64
// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSW, CPU Feature: AVX512BW
-func (x Uint16x8) SaturatedAddMasked(y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) SaturatedAddMasked(y Uint16x8, mask Mask16x8) Uint16x8
// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSW, CPU Feature: AVX512BW
-func (x Uint16x16) SaturatedAddMasked(y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) SaturatedAddMasked(y Uint16x16, mask Mask16x16) Uint16x16
// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSW, CPU Feature: AVX512BW
-func (x Uint16x32) SaturatedAddMasked(y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) SaturatedAddMasked(y Uint16x32, mask Mask16x32) Uint16x32
/* SaturatedPairDotProdAccumulate */
// SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
//
// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x4) SaturatedPairDotProdAccumulateMasked(y Int16x8, z Int16x8, u Mask32x4) Int32x4
+func (x Int32x4) SaturatedPairDotProdAccumulateMasked(y Int16x8, z Int16x8, mask Mask32x4) Int32x4
// SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
//
// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x8) SaturatedPairDotProdAccumulateMasked(y Int16x16, z Int16x16, u Mask32x8) Int32x8
+func (x Int32x8) SaturatedPairDotProdAccumulateMasked(y Int16x16, z Int16x16, mask Mask32x8) Int32x8
// SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
//
// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x16) SaturatedPairDotProdAccumulateMasked(y Int16x32, z Int16x32, u Mask32x16) Int32x16
+func (x Int32x16) SaturatedPairDotProdAccumulateMasked(y Int16x32, z Int16x32, mask Mask32x16) Int32x16
/* SaturatedPairwiseAdd */
// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSB, CPU Feature: AVX512BW
-func (x Int8x16) SaturatedSubMasked(y Int8x16, z Mask8x16) Int8x16
+func (x Int8x16) SaturatedSubMasked(y Int8x16, mask Mask8x16) Int8x16
// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSB, CPU Feature: AVX512BW
-func (x Int8x32) SaturatedSubMasked(y Int8x32, z Mask8x32) Int8x32
+func (x Int8x32) SaturatedSubMasked(y Int8x32, mask Mask8x32) Int8x32
// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSB, CPU Feature: AVX512BW
-func (x Int8x64) SaturatedSubMasked(y Int8x64, z Mask8x64) Int8x64
+func (x Int8x64) SaturatedSubMasked(y Int8x64, mask Mask8x64) Int8x64
// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSW, CPU Feature: AVX512BW
-func (x Int16x8) SaturatedSubMasked(y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) SaturatedSubMasked(y Int16x8, mask Mask16x8) Int16x8
// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSW, CPU Feature: AVX512BW
-func (x Int16x16) SaturatedSubMasked(y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) SaturatedSubMasked(y Int16x16, mask Mask16x16) Int16x16
// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSW, CPU Feature: AVX512BW
-func (x Int16x32) SaturatedSubMasked(y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) SaturatedSubMasked(y Int16x32, mask Mask16x32) Int16x32
// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSB, CPU Feature: AVX512BW
-func (x Uint8x16) SaturatedSubMasked(y Uint8x16, z Mask8x16) Uint8x16
+func (x Uint8x16) SaturatedSubMasked(y Uint8x16, mask Mask8x16) Uint8x16
// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSB, CPU Feature: AVX512BW
-func (x Uint8x32) SaturatedSubMasked(y Uint8x32, z Mask8x32) Uint8x32
+func (x Uint8x32) SaturatedSubMasked(y Uint8x32, mask Mask8x32) Uint8x32
// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSB, CPU Feature: AVX512BW
-func (x Uint8x64) SaturatedSubMasked(y Uint8x64, z Mask8x64) Uint8x64
+func (x Uint8x64) SaturatedSubMasked(y Uint8x64, mask Mask8x64) Uint8x64
// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSW, CPU Feature: AVX512BW
-func (x Uint16x8) SaturatedSubMasked(y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) SaturatedSubMasked(y Uint16x8, mask Mask16x8) Uint16x8
// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSW, CPU Feature: AVX512BW
-func (x Uint16x16) SaturatedSubMasked(y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) SaturatedSubMasked(y Uint16x16, mask Mask16x16) Uint16x16
// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSW, CPU Feature: AVX512BW
-func (x Uint16x32) SaturatedSubMasked(y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) SaturatedSubMasked(y Uint16x32, mask Mask16x32) Uint16x32
/* SaturatedUnsignedSignedPairDotProd */
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDUBSW, CPU Feature: AVX512BW
-func (x Uint8x16) SaturatedUnsignedSignedPairDotProdMasked(y Int8x16, z Mask16x8) Int16x8
+func (x Uint8x16) SaturatedUnsignedSignedPairDotProdMasked(y Int8x16, mask Mask16x8) Int16x8
// SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDUBSW, CPU Feature: AVX512BW
-func (x Uint8x32) SaturatedUnsignedSignedPairDotProdMasked(y Int8x32, z Mask16x16) Int16x16
+func (x Uint8x32) SaturatedUnsignedSignedPairDotProdMasked(y Int8x32, mask Mask16x16) Int16x16
// SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDUBSW, CPU Feature: AVX512BW
-func (x Uint8x64) SaturatedUnsignedSignedPairDotProdMasked(y Int8x64, z Mask16x32) Int16x32
+func (x Uint8x64) SaturatedUnsignedSignedPairDotProdMasked(y Int8x64, mask Mask16x32) Int16x32
/* SaturatedUnsignedSignedQuadDotProdAccumulate */
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, u Mask32x4) Int32x4
+func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, mask Mask32x4) Int32x4
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, u Mask32x8) Int32x8
+func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, mask Mask32x8) Int32x8
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, u Mask32x16) Int32x16
+func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, mask Mask32x16) Int32x16
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, u Mask32x4) Uint32x4
+func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, mask Mask32x4) Uint32x4
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, u Mask32x8) Uint32x8
+func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, mask Mask32x8) Uint32x8
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, u Mask32x16) Uint32x16
+func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, mask Mask32x16) Uint32x16
/* Set128 */
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x8, mask Mask16x8) Int16x8
// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x16, mask Mask16x16) Int16x16
// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x32, mask Mask16x32) Int16x32
// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x4, mask Mask32x4) Int32x4
// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x8, mask Mask32x8) Int32x8
// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x16, mask Mask32x16) Int32x16
// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x2, mask Mask64x2) Int64x2
// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x4, mask Mask64x4) Int64x4
// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x8, mask Mask64x8) Int64x8
// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x8, mask Mask16x8) Uint16x8
// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x16, mask Mask16x16) Uint16x16
// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x32, mask Mask16x32) Uint16x32
// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x4, mask Mask32x4) Uint32x4
// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x8, mask Mask32x8) Uint32x8
// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x16, mask Mask32x16) Uint32x16
// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x2, mask Mask64x2) Uint64x2
// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x4, mask Mask64x4) Uint64x4
// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x8, mask Mask64x8) Uint64x8
/* ShiftAllLeftMasked */
// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX512BW
-func (x Int16x8) ShiftAllLeftMasked(y uint64, z Mask16x8) Int16x8
+func (x Int16x8) ShiftAllLeftMasked(y uint64, mask Mask16x8) Int16x8
// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX512BW
-func (x Int16x16) ShiftAllLeftMasked(y uint64, z Mask16x16) Int16x16
+func (x Int16x16) ShiftAllLeftMasked(y uint64, mask Mask16x16) Int16x16
// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX512BW
-func (x Int16x32) ShiftAllLeftMasked(y uint64, z Mask16x32) Int16x32
+func (x Int16x32) ShiftAllLeftMasked(y uint64, mask Mask16x32) Int16x32
// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX512F
-func (x Int32x4) ShiftAllLeftMasked(y uint64, z Mask32x4) Int32x4
+func (x Int32x4) ShiftAllLeftMasked(y uint64, mask Mask32x4) Int32x4
// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX512F
-func (x Int32x8) ShiftAllLeftMasked(y uint64, z Mask32x8) Int32x8
+func (x Int32x8) ShiftAllLeftMasked(y uint64, mask Mask32x8) Int32x8
// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX512F
-func (x Int32x16) ShiftAllLeftMasked(y uint64, z Mask32x16) Int32x16
+func (x Int32x16) ShiftAllLeftMasked(y uint64, mask Mask32x16) Int32x16
// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX512F
-func (x Int64x2) ShiftAllLeftMasked(y uint64, z Mask64x2) Int64x2
+func (x Int64x2) ShiftAllLeftMasked(y uint64, mask Mask64x2) Int64x2
// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX512F
-func (x Int64x4) ShiftAllLeftMasked(y uint64, z Mask64x4) Int64x4
+func (x Int64x4) ShiftAllLeftMasked(y uint64, mask Mask64x4) Int64x4
// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX512F
-func (x Int64x8) ShiftAllLeftMasked(y uint64, z Mask64x8) Int64x8
+func (x Int64x8) ShiftAllLeftMasked(y uint64, mask Mask64x8) Int64x8
// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX512BW
-func (x Uint16x8) ShiftAllLeftMasked(y uint64, z Mask16x8) Uint16x8
+func (x Uint16x8) ShiftAllLeftMasked(y uint64, mask Mask16x8) Uint16x8
// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX512BW
-func (x Uint16x16) ShiftAllLeftMasked(y uint64, z Mask16x16) Uint16x16
+func (x Uint16x16) ShiftAllLeftMasked(y uint64, mask Mask16x16) Uint16x16
// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX512BW
-func (x Uint16x32) ShiftAllLeftMasked(y uint64, z Mask16x32) Uint16x32
+func (x Uint16x32) ShiftAllLeftMasked(y uint64, mask Mask16x32) Uint16x32
// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX512F
-func (x Uint32x4) ShiftAllLeftMasked(y uint64, z Mask32x4) Uint32x4
+func (x Uint32x4) ShiftAllLeftMasked(y uint64, mask Mask32x4) Uint32x4
// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX512F
-func (x Uint32x8) ShiftAllLeftMasked(y uint64, z Mask32x8) Uint32x8
+func (x Uint32x8) ShiftAllLeftMasked(y uint64, mask Mask32x8) Uint32x8
// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX512F
-func (x Uint32x16) ShiftAllLeftMasked(y uint64, z Mask32x16) Uint32x16
+func (x Uint32x16) ShiftAllLeftMasked(y uint64, mask Mask32x16) Uint32x16
// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX512F
-func (x Uint64x2) ShiftAllLeftMasked(y uint64, z Mask64x2) Uint64x2
+func (x Uint64x2) ShiftAllLeftMasked(y uint64, mask Mask64x2) Uint64x2
// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX512F
-func (x Uint64x4) ShiftAllLeftMasked(y uint64, z Mask64x4) Uint64x4
+func (x Uint64x4) ShiftAllLeftMasked(y uint64, mask Mask64x4) Uint64x4
// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX512F
-func (x Uint64x8) ShiftAllLeftMasked(y uint64, z Mask64x8) Uint64x8
+func (x Uint64x8) ShiftAllLeftMasked(y uint64, mask Mask64x8) Uint64x8
/* ShiftAllRight */
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x8, mask Mask16x8) Int16x8
// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x16, mask Mask16x16) Int16x16
// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x32, mask Mask16x32) Int16x32
// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x4, mask Mask32x4) Int32x4
// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x8, mask Mask32x8) Int32x8
// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x16, mask Mask32x16) Int32x16
// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x2, mask Mask64x2) Int64x2
// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x4, mask Mask64x4) Int64x4
// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x8, mask Mask64x8) Int64x8
// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x8, mask Mask16x8) Uint16x8
// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x16, mask Mask16x16) Uint16x16
// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x32, mask Mask16x32) Uint16x32
// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x4, mask Mask32x4) Uint32x4
// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x8, mask Mask32x8) Uint32x8
// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x16, mask Mask32x16) Uint32x16
// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x2, mask Mask64x2) Uint64x2
// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x4, mask Mask64x4) Uint64x4
// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x8, mask Mask64x8) Uint64x8
/* ShiftAllRightMasked */
// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAW, CPU Feature: AVX512BW
-func (x Int16x8) ShiftAllRightMasked(y uint64, z Mask16x8) Int16x8
+func (x Int16x8) ShiftAllRightMasked(y uint64, mask Mask16x8) Int16x8
// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAW, CPU Feature: AVX512BW
-func (x Int16x16) ShiftAllRightMasked(y uint64, z Mask16x16) Int16x16
+func (x Int16x16) ShiftAllRightMasked(y uint64, mask Mask16x16) Int16x16
// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAW, CPU Feature: AVX512BW
-func (x Int16x32) ShiftAllRightMasked(y uint64, z Mask16x32) Int16x32
+func (x Int16x32) ShiftAllRightMasked(y uint64, mask Mask16x32) Int16x32
// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAD, CPU Feature: AVX512F
-func (x Int32x4) ShiftAllRightMasked(y uint64, z Mask32x4) Int32x4
+func (x Int32x4) ShiftAllRightMasked(y uint64, mask Mask32x4) Int32x4
// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAD, CPU Feature: AVX512F
-func (x Int32x8) ShiftAllRightMasked(y uint64, z Mask32x8) Int32x8
+func (x Int32x8) ShiftAllRightMasked(y uint64, mask Mask32x8) Int32x8
// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAD, CPU Feature: AVX512F
-func (x Int32x16) ShiftAllRightMasked(y uint64, z Mask32x16) Int32x16
+func (x Int32x16) ShiftAllRightMasked(y uint64, mask Mask32x16) Int32x16
// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAQ, CPU Feature: AVX512F
-func (x Int64x2) ShiftAllRightMasked(y uint64, z Mask64x2) Int64x2
+func (x Int64x2) ShiftAllRightMasked(y uint64, mask Mask64x2) Int64x2
// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAQ, CPU Feature: AVX512F
-func (x Int64x4) ShiftAllRightMasked(y uint64, z Mask64x4) Int64x4
+func (x Int64x4) ShiftAllRightMasked(y uint64, mask Mask64x4) Int64x4
// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAQ, CPU Feature: AVX512F
-func (x Int64x8) ShiftAllRightMasked(y uint64, z Mask64x8) Int64x8
+func (x Int64x8) ShiftAllRightMasked(y uint64, mask Mask64x8) Int64x8
// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLW, CPU Feature: AVX512BW
-func (x Uint16x8) ShiftAllRightMasked(y uint64, z Mask16x8) Uint16x8
+func (x Uint16x8) ShiftAllRightMasked(y uint64, mask Mask16x8) Uint16x8
// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLW, CPU Feature: AVX512BW
-func (x Uint16x16) ShiftAllRightMasked(y uint64, z Mask16x16) Uint16x16
+func (x Uint16x16) ShiftAllRightMasked(y uint64, mask Mask16x16) Uint16x16
// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLW, CPU Feature: AVX512BW
-func (x Uint16x32) ShiftAllRightMasked(y uint64, z Mask16x32) Uint16x32
+func (x Uint16x32) ShiftAllRightMasked(y uint64, mask Mask16x32) Uint16x32
// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLD, CPU Feature: AVX512F
-func (x Uint32x4) ShiftAllRightMasked(y uint64, z Mask32x4) Uint32x4
+func (x Uint32x4) ShiftAllRightMasked(y uint64, mask Mask32x4) Uint32x4
// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLD, CPU Feature: AVX512F
-func (x Uint32x8) ShiftAllRightMasked(y uint64, z Mask32x8) Uint32x8
+func (x Uint32x8) ShiftAllRightMasked(y uint64, mask Mask32x8) Uint32x8
// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLD, CPU Feature: AVX512F
-func (x Uint32x16) ShiftAllRightMasked(y uint64, z Mask32x16) Uint32x16
+func (x Uint32x16) ShiftAllRightMasked(y uint64, mask Mask32x16) Uint32x16
// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLQ, CPU Feature: AVX512F
-func (x Uint64x2) ShiftAllRightMasked(y uint64, z Mask64x2) Uint64x2
+func (x Uint64x2) ShiftAllRightMasked(y uint64, mask Mask64x2) Uint64x2
// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLQ, CPU Feature: AVX512F
-func (x Uint64x4) ShiftAllRightMasked(y uint64, z Mask64x4) Uint64x4
+func (x Uint64x4) ShiftAllRightMasked(y uint64, mask Mask64x4) Uint64x4
// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLQ, CPU Feature: AVX512F
-func (x Uint64x8) ShiftAllRightMasked(y uint64, z Mask64x8) Uint64x8
+func (x Uint64x8) ShiftAllRightMasked(y uint64, mask Mask64x8) Uint64x8
/* ShiftLeft */
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftLeftAndFillUpperFromMasked(y Int16x8, z Int16x8, u Mask16x8) Int16x8
+func (x Int16x8) ShiftLeftAndFillUpperFromMasked(y Int16x8, z Int16x8, mask Mask16x8) Int16x8
// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftLeftAndFillUpperFromMasked(y Int16x16, z Int16x16, u Mask16x16) Int16x16
+func (x Int16x16) ShiftLeftAndFillUpperFromMasked(y Int16x16, z Int16x16, mask Mask16x16) Int16x16
// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftLeftAndFillUpperFromMasked(y Int16x32, z Int16x32, u Mask16x32) Int16x32
+func (x Int16x32) ShiftLeftAndFillUpperFromMasked(y Int16x32, z Int16x32, mask Mask16x32) Int16x32
// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftLeftAndFillUpperFromMasked(y Int32x4, z Int32x4, u Mask32x4) Int32x4
+func (x Int32x4) ShiftLeftAndFillUpperFromMasked(y Int32x4, z Int32x4, mask Mask32x4) Int32x4
// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftLeftAndFillUpperFromMasked(y Int32x8, z Int32x8, u Mask32x8) Int32x8
+func (x Int32x8) ShiftLeftAndFillUpperFromMasked(y Int32x8, z Int32x8, mask Mask32x8) Int32x8
// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftLeftAndFillUpperFromMasked(y Int32x16, z Int32x16, u Mask32x16) Int32x16
+func (x Int32x16) ShiftLeftAndFillUpperFromMasked(y Int32x16, z Int32x16, mask Mask32x16) Int32x16
// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftLeftAndFillUpperFromMasked(y Int64x2, z Int64x2, u Mask64x2) Int64x2
+func (x Int64x2) ShiftLeftAndFillUpperFromMasked(y Int64x2, z Int64x2, mask Mask64x2) Int64x2
// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftLeftAndFillUpperFromMasked(y Int64x4, z Int64x4, u Mask64x4) Int64x4
+func (x Int64x4) ShiftLeftAndFillUpperFromMasked(y Int64x4, z Int64x4, mask Mask64x4) Int64x4
// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftLeftAndFillUpperFromMasked(y Int64x8, z Int64x8, u Mask64x8) Int64x8
+func (x Int64x8) ShiftLeftAndFillUpperFromMasked(y Int64x8, z Int64x8, mask Mask64x8) Int64x8
// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftLeftAndFillUpperFromMasked(y Uint16x8, z Uint16x8, u Mask16x8) Uint16x8
+func (x Uint16x8) ShiftLeftAndFillUpperFromMasked(y Uint16x8, z Uint16x8, mask Mask16x8) Uint16x8
// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftLeftAndFillUpperFromMasked(y Uint16x16, z Uint16x16, u Mask16x16) Uint16x16
+func (x Uint16x16) ShiftLeftAndFillUpperFromMasked(y Uint16x16, z Uint16x16, mask Mask16x16) Uint16x16
// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftLeftAndFillUpperFromMasked(y Uint16x32, z Uint16x32, u Mask16x32) Uint16x32
+func (x Uint16x32) ShiftLeftAndFillUpperFromMasked(y Uint16x32, z Uint16x32, mask Mask16x32) Uint16x32
// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftLeftAndFillUpperFromMasked(y Uint32x4, z Uint32x4, u Mask32x4) Uint32x4
+func (x Uint32x4) ShiftLeftAndFillUpperFromMasked(y Uint32x4, z Uint32x4, mask Mask32x4) Uint32x4
// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftLeftAndFillUpperFromMasked(y Uint32x8, z Uint32x8, u Mask32x8) Uint32x8
+func (x Uint32x8) ShiftLeftAndFillUpperFromMasked(y Uint32x8, z Uint32x8, mask Mask32x8) Uint32x8
// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftLeftAndFillUpperFromMasked(y Uint32x16, z Uint32x16, u Mask32x16) Uint32x16
+func (x Uint32x16) ShiftLeftAndFillUpperFromMasked(y Uint32x16, z Uint32x16, mask Mask32x16) Uint32x16
// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftLeftAndFillUpperFromMasked(y Uint64x2, z Uint64x2, u Mask64x2) Uint64x2
+func (x Uint64x2) ShiftLeftAndFillUpperFromMasked(y Uint64x2, z Uint64x2, mask Mask64x2) Uint64x2
// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftLeftAndFillUpperFromMasked(y Uint64x4, z Uint64x4, u Mask64x4) Uint64x4
+func (x Uint64x4) ShiftLeftAndFillUpperFromMasked(y Uint64x4, z Uint64x4, mask Mask64x4) Uint64x4
// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftLeftAndFillUpperFromMasked(y Uint64x8, z Uint64x8, u Mask64x8) Uint64x8
+func (x Uint64x8) ShiftLeftAndFillUpperFromMasked(y Uint64x8, z Uint64x8, mask Mask64x8) Uint64x8
/* ShiftLeftMasked */
// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512BW
-func (x Int16x8) ShiftLeftMasked(y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) ShiftLeftMasked(y Int16x8, mask Mask16x8) Int16x8
// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512BW
-func (x Int16x16) ShiftLeftMasked(y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) ShiftLeftMasked(y Int16x16, mask Mask16x16) Int16x16
// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512BW
-func (x Int16x32) ShiftLeftMasked(y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) ShiftLeftMasked(y Int16x32, mask Mask16x32) Int16x32
// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX512F
-func (x Int32x4) ShiftLeftMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) ShiftLeftMasked(y Int32x4, mask Mask32x4) Int32x4
// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX512F
-func (x Int32x8) ShiftLeftMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) ShiftLeftMasked(y Int32x8, mask Mask32x8) Int32x8
// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX512F
-func (x Int32x16) ShiftLeftMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) ShiftLeftMasked(y Int32x16, mask Mask32x16) Int32x16
// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX512F
-func (x Int64x2) ShiftLeftMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) ShiftLeftMasked(y Int64x2, mask Mask64x2) Int64x2
// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX512F
-func (x Int64x4) ShiftLeftMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) ShiftLeftMasked(y Int64x4, mask Mask64x4) Int64x4
// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX512F
-func (x Int64x8) ShiftLeftMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) ShiftLeftMasked(y Int64x8, mask Mask64x8) Int64x8
// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512BW
-func (x Uint16x8) ShiftLeftMasked(y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) ShiftLeftMasked(y Uint16x8, mask Mask16x8) Uint16x8
// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512BW
-func (x Uint16x16) ShiftLeftMasked(y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) ShiftLeftMasked(y Uint16x16, mask Mask16x16) Uint16x16
// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512BW
-func (x Uint16x32) ShiftLeftMasked(y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) ShiftLeftMasked(y Uint16x32, mask Mask16x32) Uint16x32
// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX512F
-func (x Uint32x4) ShiftLeftMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) ShiftLeftMasked(y Uint32x4, mask Mask32x4) Uint32x4
// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX512F
-func (x Uint32x8) ShiftLeftMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) ShiftLeftMasked(y Uint32x8, mask Mask32x8) Uint32x8
// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX512F
-func (x Uint32x16) ShiftLeftMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) ShiftLeftMasked(y Uint32x16, mask Mask32x16) Uint32x16
// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX512F
-func (x Uint64x2) ShiftLeftMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) ShiftLeftMasked(y Uint64x2, mask Mask64x2) Uint64x2
// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX512F
-func (x Uint64x4) ShiftLeftMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) ShiftLeftMasked(y Uint64x4, mask Mask64x4) Uint64x4
// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX512F
-func (x Uint64x8) ShiftLeftMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) ShiftLeftMasked(y Uint64x8, mask Mask64x8) Uint64x8
/* ShiftRight */
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftRightAndFillUpperFromMasked(y Int16x8, z Int16x8, u Mask16x8) Int16x8
+func (x Int16x8) ShiftRightAndFillUpperFromMasked(y Int16x8, z Int16x8, mask Mask16x8) Int16x8
// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftRightAndFillUpperFromMasked(y Int16x16, z Int16x16, u Mask16x16) Int16x16
+func (x Int16x16) ShiftRightAndFillUpperFromMasked(y Int16x16, z Int16x16, mask Mask16x16) Int16x16
// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftRightAndFillUpperFromMasked(y Int16x32, z Int16x32, u Mask16x32) Int16x32
+func (x Int16x32) ShiftRightAndFillUpperFromMasked(y Int16x32, z Int16x32, mask Mask16x32) Int16x32
// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftRightAndFillUpperFromMasked(y Int32x4, z Int32x4, u Mask32x4) Int32x4
+func (x Int32x4) ShiftRightAndFillUpperFromMasked(y Int32x4, z Int32x4, mask Mask32x4) Int32x4
// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftRightAndFillUpperFromMasked(y Int32x8, z Int32x8, u Mask32x8) Int32x8
+func (x Int32x8) ShiftRightAndFillUpperFromMasked(y Int32x8, z Int32x8, mask Mask32x8) Int32x8
// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftRightAndFillUpperFromMasked(y Int32x16, z Int32x16, u Mask32x16) Int32x16
+func (x Int32x16) ShiftRightAndFillUpperFromMasked(y Int32x16, z Int32x16, mask Mask32x16) Int32x16
// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftRightAndFillUpperFromMasked(y Int64x2, z Int64x2, u Mask64x2) Int64x2
+func (x Int64x2) ShiftRightAndFillUpperFromMasked(y Int64x2, z Int64x2, mask Mask64x2) Int64x2
// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftRightAndFillUpperFromMasked(y Int64x4, z Int64x4, u Mask64x4) Int64x4
+func (x Int64x4) ShiftRightAndFillUpperFromMasked(y Int64x4, z Int64x4, mask Mask64x4) Int64x4
// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftRightAndFillUpperFromMasked(y Int64x8, z Int64x8, u Mask64x8) Int64x8
+func (x Int64x8) ShiftRightAndFillUpperFromMasked(y Int64x8, z Int64x8, mask Mask64x8) Int64x8
// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftRightAndFillUpperFromMasked(y Uint16x8, z Uint16x8, u Mask16x8) Uint16x8
+func (x Uint16x8) ShiftRightAndFillUpperFromMasked(y Uint16x8, z Uint16x8, mask Mask16x8) Uint16x8
// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftRightAndFillUpperFromMasked(y Uint16x16, z Uint16x16, u Mask16x16) Uint16x16
+func (x Uint16x16) ShiftRightAndFillUpperFromMasked(y Uint16x16, z Uint16x16, mask Mask16x16) Uint16x16
// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftRightAndFillUpperFromMasked(y Uint16x32, z Uint16x32, u Mask16x32) Uint16x32
+func (x Uint16x32) ShiftRightAndFillUpperFromMasked(y Uint16x32, z Uint16x32, mask Mask16x32) Uint16x32
// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftRightAndFillUpperFromMasked(y Uint32x4, z Uint32x4, u Mask32x4) Uint32x4
+func (x Uint32x4) ShiftRightAndFillUpperFromMasked(y Uint32x4, z Uint32x4, mask Mask32x4) Uint32x4
// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftRightAndFillUpperFromMasked(y Uint32x8, z Uint32x8, u Mask32x8) Uint32x8
+func (x Uint32x8) ShiftRightAndFillUpperFromMasked(y Uint32x8, z Uint32x8, mask Mask32x8) Uint32x8
// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftRightAndFillUpperFromMasked(y Uint32x16, z Uint32x16, u Mask32x16) Uint32x16
+func (x Uint32x16) ShiftRightAndFillUpperFromMasked(y Uint32x16, z Uint32x16, mask Mask32x16) Uint32x16
// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftRightAndFillUpperFromMasked(y Uint64x2, z Uint64x2, u Mask64x2) Uint64x2
+func (x Uint64x2) ShiftRightAndFillUpperFromMasked(y Uint64x2, z Uint64x2, mask Mask64x2) Uint64x2
// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftRightAndFillUpperFromMasked(y Uint64x4, z Uint64x4, u Mask64x4) Uint64x4
+func (x Uint64x4) ShiftRightAndFillUpperFromMasked(y Uint64x4, z Uint64x4, mask Mask64x4) Uint64x4
// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftRightAndFillUpperFromMasked(y Uint64x8, z Uint64x8, u Mask64x8) Uint64x8
+func (x Uint64x8) ShiftRightAndFillUpperFromMasked(y Uint64x8, z Uint64x8, mask Mask64x8) Uint64x8
/* ShiftRightMasked */
// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVW, CPU Feature: AVX512BW
-func (x Int16x8) ShiftRightMasked(y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) ShiftRightMasked(y Int16x8, mask Mask16x8) Int16x8
// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVW, CPU Feature: AVX512BW
-func (x Int16x16) ShiftRightMasked(y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) ShiftRightMasked(y Int16x16, mask Mask16x16) Int16x16
// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVW, CPU Feature: AVX512BW
-func (x Int16x32) ShiftRightMasked(y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) ShiftRightMasked(y Int16x32, mask Mask16x32) Int16x32
// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVD, CPU Feature: AVX512F
-func (x Int32x4) ShiftRightMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) ShiftRightMasked(y Int32x4, mask Mask32x4) Int32x4
// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVD, CPU Feature: AVX512F
-func (x Int32x8) ShiftRightMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) ShiftRightMasked(y Int32x8, mask Mask32x8) Int32x8
// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVD, CPU Feature: AVX512F
-func (x Int32x16) ShiftRightMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) ShiftRightMasked(y Int32x16, mask Mask32x16) Int32x16
// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVQ, CPU Feature: AVX512F
-func (x Int64x2) ShiftRightMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) ShiftRightMasked(y Int64x2, mask Mask64x2) Int64x2
// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVQ, CPU Feature: AVX512F
-func (x Int64x4) ShiftRightMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) ShiftRightMasked(y Int64x4, mask Mask64x4) Int64x4
// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVQ, CPU Feature: AVX512F
-func (x Int64x8) ShiftRightMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) ShiftRightMasked(y Int64x8, mask Mask64x8) Int64x8
// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVW, CPU Feature: AVX512BW
-func (x Uint16x8) ShiftRightMasked(y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) ShiftRightMasked(y Uint16x8, mask Mask16x8) Uint16x8
// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVW, CPU Feature: AVX512BW
-func (x Uint16x16) ShiftRightMasked(y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) ShiftRightMasked(y Uint16x16, mask Mask16x16) Uint16x16
// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVW, CPU Feature: AVX512BW
-func (x Uint16x32) ShiftRightMasked(y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) ShiftRightMasked(y Uint16x32, mask Mask16x32) Uint16x32
// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVD, CPU Feature: AVX512F
-func (x Uint32x4) ShiftRightMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) ShiftRightMasked(y Uint32x4, mask Mask32x4) Uint32x4
// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVD, CPU Feature: AVX512F
-func (x Uint32x8) ShiftRightMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) ShiftRightMasked(y Uint32x8, mask Mask32x8) Uint32x8
// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVD, CPU Feature: AVX512F
-func (x Uint32x16) ShiftRightMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) ShiftRightMasked(y Uint32x16, mask Mask32x16) Uint32x16
// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVQ, CPU Feature: AVX512F
-func (x Uint64x2) ShiftRightMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) ShiftRightMasked(y Uint64x2, mask Mask64x2) Uint64x2
// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVQ, CPU Feature: AVX512F
-func (x Uint64x4) ShiftRightMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) ShiftRightMasked(y Uint64x4, mask Mask64x4) Uint64x4
// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVQ, CPU Feature: AVX512F
-func (x Uint64x8) ShiftRightMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) ShiftRightMasked(y Uint64x8, mask Mask64x8) Uint64x8
/* Sign */
// SqrtMasked computes the square root of each element.
//
// Asm: VSQRTPS, CPU Feature: AVX512F
-func (x Float32x4) SqrtMasked(y Mask32x4) Float32x4
+func (x Float32x4) SqrtMasked(mask Mask32x4) Float32x4
// SqrtMasked computes the square root of each element.
//
// Asm: VSQRTPS, CPU Feature: AVX512F
-func (x Float32x8) SqrtMasked(y Mask32x8) Float32x8
+func (x Float32x8) SqrtMasked(mask Mask32x8) Float32x8
// SqrtMasked computes the square root of each element.
//
// Asm: VSQRTPS, CPU Feature: AVX512F
-func (x Float32x16) SqrtMasked(y Mask32x16) Float32x16
+func (x Float32x16) SqrtMasked(mask Mask32x16) Float32x16
// SqrtMasked computes the square root of each element.
//
// Asm: VSQRTPD, CPU Feature: AVX512F
-func (x Float64x2) SqrtMasked(y Mask64x2) Float64x2
+func (x Float64x2) SqrtMasked(mask Mask64x2) Float64x2
// SqrtMasked computes the square root of each element.
//
// Asm: VSQRTPD, CPU Feature: AVX512F
-func (x Float64x4) SqrtMasked(y Mask64x4) Float64x4
+func (x Float64x4) SqrtMasked(mask Mask64x4) Float64x4
// SqrtMasked computes the square root of each element.
//
// Asm: VSQRTPD, CPU Feature: AVX512F
-func (x Float64x8) SqrtMasked(y Mask64x8) Float64x8
+func (x Float64x8) SqrtMasked(mask Mask64x8) Float64x8
/* Sub */
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VSUBPS, CPU Feature: AVX512F
-func (x Float32x4) SubMasked(y Float32x4, z Mask32x4) Float32x4
+func (x Float32x4) SubMasked(y Float32x4, mask Mask32x4) Float32x4
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VSUBPS, CPU Feature: AVX512F
-func (x Float32x8) SubMasked(y Float32x8, z Mask32x8) Float32x8
+func (x Float32x8) SubMasked(y Float32x8, mask Mask32x8) Float32x8
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VSUBPS, CPU Feature: AVX512F
-func (x Float32x16) SubMasked(y Float32x16, z Mask32x16) Float32x16
+func (x Float32x16) SubMasked(y Float32x16, mask Mask32x16) Float32x16
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VSUBPD, CPU Feature: AVX512F
-func (x Float64x2) SubMasked(y Float64x2, z Mask64x2) Float64x2
+func (x Float64x2) SubMasked(y Float64x2, mask Mask64x2) Float64x2
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VSUBPD, CPU Feature: AVX512F
-func (x Float64x4) SubMasked(y Float64x4, z Mask64x4) Float64x4
+func (x Float64x4) SubMasked(y Float64x4, mask Mask64x4) Float64x4
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VSUBPD, CPU Feature: AVX512F
-func (x Float64x8) SubMasked(y Float64x8, z Mask64x8) Float64x8
+func (x Float64x8) SubMasked(y Float64x8, mask Mask64x8) Float64x8
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX512BW
-func (x Int8x16) SubMasked(y Int8x16, z Mask8x16) Int8x16
+func (x Int8x16) SubMasked(y Int8x16, mask Mask8x16) Int8x16
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX512BW
-func (x Int8x32) SubMasked(y Int8x32, z Mask8x32) Int8x32
+func (x Int8x32) SubMasked(y Int8x32, mask Mask8x32) Int8x32
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX512BW
-func (x Int8x64) SubMasked(y Int8x64, z Mask8x64) Int8x64
+func (x Int8x64) SubMasked(y Int8x64, mask Mask8x64) Int8x64
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX512BW
-func (x Int16x8) SubMasked(y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) SubMasked(y Int16x8, mask Mask16x8) Int16x8
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX512BW
-func (x Int16x16) SubMasked(y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) SubMasked(y Int16x16, mask Mask16x16) Int16x16
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX512BW
-func (x Int16x32) SubMasked(y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) SubMasked(y Int16x32, mask Mask16x32) Int16x32
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX512F
-func (x Int32x4) SubMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) SubMasked(y Int32x4, mask Mask32x4) Int32x4
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX512F
-func (x Int32x8) SubMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) SubMasked(y Int32x8, mask Mask32x8) Int32x8
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX512F
-func (x Int32x16) SubMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) SubMasked(y Int32x16, mask Mask32x16) Int32x16
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX512F
-func (x Int64x2) SubMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) SubMasked(y Int64x2, mask Mask64x2) Int64x2
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX512F
-func (x Int64x4) SubMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) SubMasked(y Int64x4, mask Mask64x4) Int64x4
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX512F
-func (x Int64x8) SubMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) SubMasked(y Int64x8, mask Mask64x8) Int64x8
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX512BW
-func (x Uint8x16) SubMasked(y Uint8x16, z Mask8x16) Uint8x16
+func (x Uint8x16) SubMasked(y Uint8x16, mask Mask8x16) Uint8x16
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX512BW
-func (x Uint8x32) SubMasked(y Uint8x32, z Mask8x32) Uint8x32
+func (x Uint8x32) SubMasked(y Uint8x32, mask Mask8x32) Uint8x32
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX512BW
-func (x Uint8x64) SubMasked(y Uint8x64, z Mask8x64) Uint8x64
+func (x Uint8x64) SubMasked(y Uint8x64, mask Mask8x64) Uint8x64
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX512BW
-func (x Uint16x8) SubMasked(y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) SubMasked(y Uint16x8, mask Mask16x8) Uint16x8
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX512BW
-func (x Uint16x16) SubMasked(y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) SubMasked(y Uint16x16, mask Mask16x16) Uint16x16
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX512BW
-func (x Uint16x32) SubMasked(y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) SubMasked(y Uint16x32, mask Mask16x32) Uint16x32
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX512F
-func (x Uint32x4) SubMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) SubMasked(y Uint32x4, mask Mask32x4) Uint32x4
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX512F
-func (x Uint32x8) SubMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) SubMasked(y Uint32x8, mask Mask32x8) Uint32x8
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX512F
-func (x Uint32x16) SubMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) SubMasked(y Uint32x16, mask Mask32x16) Uint32x16
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX512F
-func (x Uint64x2) SubMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) SubMasked(y Uint64x2, mask Mask64x2) Uint64x2
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX512F
-func (x Uint64x4) SubMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) SubMasked(y Uint64x4, mask Mask64x4) Uint64x4
// SubMasked subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX512F
-func (x Uint64x8) SubMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) SubMasked(y Uint64x8, mask Mask64x8) Uint64x8
/* Trunc */
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x4) TruncWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
+func (x Float32x4) TruncWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
// TruncWithPrecisionMasked truncates elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x8) TruncWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
+func (x Float32x8) TruncWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
// TruncWithPrecisionMasked truncates elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x16) TruncWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
+func (x Float32x16) TruncWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
// TruncWithPrecisionMasked truncates elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x2) TruncWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
+func (x Float64x2) TruncWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
// TruncWithPrecisionMasked truncates elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x4) TruncWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
+func (x Float64x4) TruncWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
// TruncWithPrecisionMasked truncates elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x8) TruncWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
+func (x Float64x8) TruncWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
/* UnsignedSignedQuadDotProdAccumulate */
// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int32x4) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, u Mask32x4) Int32x4
+func (x Int32x4) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, mask Mask32x4) Int32x4
// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int32x8) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, u Mask32x8) Int32x8
+func (x Int32x8) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, mask Mask32x8) Int32x8
// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int32x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, u Mask32x16) Int32x16
+func (x Int32x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, mask Mask32x16) Int32x16
// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Uint32x4) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, u Mask32x4) Uint32x4
+func (x Uint32x4) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, mask Mask32x4) Uint32x4
// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Uint32x8) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, u Mask32x8) Uint32x8
+func (x Uint32x8) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, mask Mask32x8) Uint32x8
// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Uint32x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, u Mask32x16) Uint32x16
+func (x Uint32x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, mask Mask32x16) Uint32x16
/* Xor */
// XorMasked performs a masked bitwise XOR operation between two vectors.
//
// Asm: VPXORD, CPU Feature: AVX512F
-func (x Int32x4) XorMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) XorMasked(y Int32x4, mask Mask32x4) Int32x4
// XorMasked performs a masked bitwise XOR operation between two vectors.
//
// Asm: VPXORD, CPU Feature: AVX512F
-func (x Int32x8) XorMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) XorMasked(y Int32x8, mask Mask32x8) Int32x8
// XorMasked performs a masked bitwise XOR operation between two vectors.
//
// Asm: VPXORD, CPU Feature: AVX512F
-func (x Int32x16) XorMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) XorMasked(y Int32x16, mask Mask32x16) Int32x16
// XorMasked performs a masked bitwise XOR operation between two vectors.
//
// Asm: VPXORQ, CPU Feature: AVX512F
-func (x Int64x2) XorMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) XorMasked(y Int64x2, mask Mask64x2) Int64x2
// XorMasked performs a masked bitwise XOR operation between two vectors.
//
// Asm: VPXORQ, CPU Feature: AVX512F
-func (x Int64x4) XorMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) XorMasked(y Int64x4, mask Mask64x4) Int64x4
// XorMasked performs a masked bitwise XOR operation between two vectors.
//
// Asm: VPXORQ, CPU Feature: AVX512F
-func (x Int64x8) XorMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) XorMasked(y Int64x8, mask Mask64x8) Int64x8
// XorMasked performs a masked bitwise XOR operation between two vectors.
//
// Asm: VPXORD, CPU Feature: AVX512F
-func (x Uint32x4) XorMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) XorMasked(y Uint32x4, mask Mask32x4) Uint32x4
// XorMasked performs a masked bitwise XOR operation between two vectors.
//
// Asm: VPXORD, CPU Feature: AVX512F
-func (x Uint32x8) XorMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) XorMasked(y Uint32x8, mask Mask32x8) Uint32x8
// XorMasked performs a masked bitwise XOR operation between two vectors.
//
// Asm: VPXORD, CPU Feature: AVX512F
-func (x Uint32x16) XorMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) XorMasked(y Uint32x16, mask Mask32x16) Uint32x16
// XorMasked performs a masked bitwise XOR operation between two vectors.
//
// Asm: VPXORQ, CPU Feature: AVX512F
-func (x Uint64x2) XorMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) XorMasked(y Uint64x2, mask Mask64x2) Uint64x2
// XorMasked performs a masked bitwise XOR operation between two vectors.
//
// Asm: VPXORQ, CPU Feature: AVX512F
-func (x Uint64x4) XorMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) XorMasked(y Uint64x4, mask Mask64x4) Uint64x4
// XorMasked performs a masked bitwise XOR operation between two vectors.
//
// Asm: VPXORQ, CPU Feature: AVX512F
-func (x Uint64x8) XorMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) XorMasked(y Uint64x8, mask Mask64x8) Uint64x8
// Float64x2 converts from Float32x4 to Float64x2
func (from Float32x4) AsFloat64x2() (to Float64x2)