package simd
-// ApproximateReciprocal computes an approximate reciprocal of each element.
-//
-// Asm: VRCP14PS, CPU Feature: AVX512EVEX
-func (x Float32x16) ApproximateReciprocal() Float32x16
+/* Absolute */
-// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
+// Absolute computes the absolute value of each element.
//
-// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX
-func (x Float32x16) ApproximateReciprocalOfSqrt() Float32x16
+// Asm: VPABSB, CPU Feature: AVX
+func (x Int8x16) Absolute() Int8x16
-// Sqrt computes the square root of each element.
+// Absolute computes the absolute value of each element.
//
-// Asm: VSQRTPS, CPU Feature: AVX512EVEX
-func (x Float32x16) Sqrt() Float32x16
+// Asm: VPABSB, CPU Feature: AVX2
+func (x Int8x32) Absolute() Int8x32
-// ApproximateReciprocal computes an approximate reciprocal of each element.
+// Absolute computes the absolute value of each element.
//
-// Asm: VRCP14PS, CPU Feature: AVX512EVEX
-func (x Float32x4) ApproximateReciprocal() Float32x4
+// Asm: VPABSB, CPU Feature: AVX512EVEX
+func (x Int8x64) Absolute() Int8x64
-// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
+// Absolute computes the absolute value of each element.
//
-// Asm: VRSQRTPS, CPU Feature: AVX
-func (x Float32x4) ApproximateReciprocalOfSqrt() Float32x4
+// Asm: VPABSW, CPU Feature: AVX
+func (x Int16x8) Absolute() Int16x8
-// Ceil rounds elements up to the nearest integer.
-// Const Immediate = 2.
+// Absolute computes the absolute value of each element.
//
-// Asm: VROUNDPS, CPU Feature: AVX
-func (x Float32x4) Ceil() Float32x4
+// Asm: VPABSW, CPU Feature: AVX2
+func (x Int16x16) Absolute() Int16x16
-// Floor rounds elements down to the nearest integer.
-// Const Immediate = 1.
+// Absolute computes the absolute value of each element.
//
-// Asm: VROUNDPS, CPU Feature: AVX
-func (x Float32x4) Floor() Float32x4
+// Asm: VPABSW, CPU Feature: AVX512EVEX
+func (x Int16x32) Absolute() Int16x32
-// Round rounds elements to the nearest integer.
-// Const Immediate = 0.
+// Absolute computes the absolute value of each element.
//
-// Asm: VROUNDPS, CPU Feature: AVX
-func (x Float32x4) Round() Float32x4
+// Asm: VPABSD, CPU Feature: AVX
+func (x Int32x4) Absolute() Int32x4
-// Sqrt computes the square root of each element.
+// Absolute computes the absolute value of each element.
//
-// Asm: VSQRTPS, CPU Feature: AVX
-func (x Float32x4) Sqrt() Float32x4
+// Asm: VPABSD, CPU Feature: AVX2
+func (x Int32x8) Absolute() Int32x8
-// Trunc truncates elements towards zero.
-// Const Immediate = 3.
+// Absolute computes the absolute value of each element.
//
-// Asm: VROUNDPS, CPU Feature: AVX
-func (x Float32x4) Trunc() Float32x4
+// Asm: VPABSD, CPU Feature: AVX512EVEX
+func (x Int32x16) Absolute() Int32x16
-// ApproximateReciprocal computes an approximate reciprocal of each element.
+// Absolute computes the absolute value of each element.
//
-// Asm: VRCP14PS, CPU Feature: AVX512EVEX
-func (x Float32x8) ApproximateReciprocal() Float32x8
+// Asm: VPABSQ, CPU Feature: AVX512EVEX
+func (x Int64x2) Absolute() Int64x2
-// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
+// Absolute computes the absolute value of each element.
//
-// Asm: VRSQRTPS, CPU Feature: AVX
-func (x Float32x8) ApproximateReciprocalOfSqrt() Float32x8
+// Asm: VPABSQ, CPU Feature: AVX512EVEX
+func (x Int64x4) Absolute() Int64x4
-// Ceil rounds elements up to the nearest integer.
-// Const Immediate = 2.
+// Absolute computes the absolute value of each element.
//
-// Asm: VROUNDPS, CPU Feature: AVX
-func (x Float32x8) Ceil() Float32x8
+// Asm: VPABSQ, CPU Feature: AVX512EVEX
+func (x Int64x8) Absolute() Int64x8
-// Floor rounds elements down to the nearest integer.
-// Const Immediate = 1.
-//
-// Asm: VROUNDPS, CPU Feature: AVX
-func (x Float32x8) Floor() Float32x8
+/* Add */
-// Round rounds elements to the nearest integer.
-// Const Immediate = 0.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VROUNDPS, CPU Feature: AVX
-func (x Float32x8) Round() Float32x8
+// Asm: VADDPS, CPU Feature: AVX
+func (x Float32x4) Add(y Float32x4) Float32x4
-// Sqrt computes the square root of each element.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VSQRTPS, CPU Feature: AVX
-func (x Float32x8) Sqrt() Float32x8
+// Asm: VADDPS, CPU Feature: AVX
+func (x Float32x8) Add(y Float32x8) Float32x8
-// Trunc truncates elements towards zero.
-// Const Immediate = 3.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VROUNDPS, CPU Feature: AVX
-func (x Float32x8) Trunc() Float32x8
+// Asm: VADDPS, CPU Feature: AVX512EVEX
+func (x Float32x16) Add(y Float32x16) Float32x16
-// ApproximateReciprocal computes an approximate reciprocal of each element.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VRCP14PD, CPU Feature: AVX512EVEX
-func (x Float64x2) ApproximateReciprocal() Float64x2
+// Asm: VADDPD, CPU Feature: AVX
+func (x Float64x2) Add(y Float64x2) Float64x2
-// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX
-func (x Float64x2) ApproximateReciprocalOfSqrt() Float64x2
+// Asm: VADDPD, CPU Feature: AVX
+func (x Float64x4) Add(y Float64x4) Float64x4
-// Ceil rounds elements up to the nearest integer.
-// Const Immediate = 2.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VROUNDPD, CPU Feature: AVX
-func (x Float64x2) Ceil() Float64x2
+// Asm: VADDPD, CPU Feature: AVX512EVEX
+func (x Float64x8) Add(y Float64x8) Float64x8
-// Floor rounds elements down to the nearest integer.
-// Const Immediate = 1.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VROUNDPD, CPU Feature: AVX
-func (x Float64x2) Floor() Float64x2
+// Asm: VPADDB, CPU Feature: AVX
+func (x Int8x16) Add(y Int8x16) Int8x16
-// Round rounds elements to the nearest integer.
-// Const Immediate = 0.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VROUNDPD, CPU Feature: AVX
-func (x Float64x2) Round() Float64x2
+// Asm: VPADDB, CPU Feature: AVX2
+func (x Int8x32) Add(y Int8x32) Int8x32
-// Sqrt computes the square root of each element.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VSQRTPD, CPU Feature: AVX
-func (x Float64x2) Sqrt() Float64x2
+// Asm: VPADDB, CPU Feature: AVX512EVEX
+func (x Int8x64) Add(y Int8x64) Int8x64
-// Trunc truncates elements towards zero.
-// Const Immediate = 3.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VROUNDPD, CPU Feature: AVX
-func (x Float64x2) Trunc() Float64x2
+// Asm: VPADDW, CPU Feature: AVX
+func (x Int16x8) Add(y Int16x8) Int16x8
-// ApproximateReciprocal computes an approximate reciprocal of each element.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VRCP14PD, CPU Feature: AVX512EVEX
-func (x Float64x4) ApproximateReciprocal() Float64x4
+// Asm: VPADDW, CPU Feature: AVX2
+func (x Int16x16) Add(y Int16x16) Int16x16
-// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX
-func (x Float64x4) ApproximateReciprocalOfSqrt() Float64x4
+// Asm: VPADDW, CPU Feature: AVX512EVEX
+func (x Int16x32) Add(y Int16x32) Int16x32
-// Ceil rounds elements up to the nearest integer.
-// Const Immediate = 2.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VROUNDPD, CPU Feature: AVX
-func (x Float64x4) Ceil() Float64x4
+// Asm: VPADDD, CPU Feature: AVX
+func (x Int32x4) Add(y Int32x4) Int32x4
-// Floor rounds elements down to the nearest integer.
-// Const Immediate = 1.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VROUNDPD, CPU Feature: AVX
-func (x Float64x4) Floor() Float64x4
+// Asm: VPADDD, CPU Feature: AVX2
+func (x Int32x8) Add(y Int32x8) Int32x8
-// Round rounds elements to the nearest integer.
-// Const Immediate = 0.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VROUNDPD, CPU Feature: AVX
-func (x Float64x4) Round() Float64x4
+// Asm: VPADDD, CPU Feature: AVX512EVEX
+func (x Int32x16) Add(y Int32x16) Int32x16
-// Sqrt computes the square root of each element.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VSQRTPD, CPU Feature: AVX
-func (x Float64x4) Sqrt() Float64x4
+// Asm: VPADDQ, CPU Feature: AVX
+func (x Int64x2) Add(y Int64x2) Int64x2
-// Trunc truncates elements towards zero.
-// Const Immediate = 3.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VROUNDPD, CPU Feature: AVX
-func (x Float64x4) Trunc() Float64x4
+// Asm: VPADDQ, CPU Feature: AVX2
+func (x Int64x4) Add(y Int64x4) Int64x4
-// ApproximateReciprocal computes an approximate reciprocal of each element.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VRCP14PD, CPU Feature: AVX512EVEX
-func (x Float64x8) ApproximateReciprocal() Float64x8
+// Asm: VPADDQ, CPU Feature: AVX512EVEX
+func (x Int64x8) Add(y Int64x8) Int64x8
-// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX
-func (x Float64x8) ApproximateReciprocalOfSqrt() Float64x8
+// Asm: VPADDB, CPU Feature: AVX
+func (x Uint8x16) Add(y Uint8x16) Uint8x16
-// Sqrt computes the square root of each element.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VSQRTPD, CPU Feature: AVX512EVEX
-func (x Float64x8) Sqrt() Float64x8
+// Asm: VPADDB, CPU Feature: AVX2
+func (x Uint8x32) Add(y Uint8x32) Uint8x32
-// Absolute computes the absolute value of each element.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPABSW, CPU Feature: AVX2
-func (x Int16x16) Absolute() Int16x16
+// Asm: VPADDB, CPU Feature: AVX512EVEX
+func (x Uint8x64) Add(y Uint8x64) Uint8x64
-// PopCount counts the number of set bits in each element.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
-func (x Int16x16) PopCount() Int16x16
+// Asm: VPADDW, CPU Feature: AVX
+func (x Uint16x8) Add(y Uint16x8) Uint16x8
-// Absolute computes the absolute value of each element.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPABSW, CPU Feature: AVX512EVEX
-func (x Int16x32) Absolute() Int16x32
+// Asm: VPADDW, CPU Feature: AVX2
+func (x Uint16x16) Add(y Uint16x16) Uint16x16
-// PopCount counts the number of set bits in each element.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
-func (x Int16x32) PopCount() Int16x32
+// Asm: VPADDW, CPU Feature: AVX512EVEX
+func (x Uint16x32) Add(y Uint16x32) Uint16x32
-// Absolute computes the absolute value of each element.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPABSW, CPU Feature: AVX
-func (x Int16x8) Absolute() Int16x8
+// Asm: VPADDD, CPU Feature: AVX
+func (x Uint32x4) Add(y Uint32x4) Uint32x4
-// PopCount counts the number of set bits in each element.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
-func (x Int16x8) PopCount() Int16x8
+// Asm: VPADDD, CPU Feature: AVX2
+func (x Uint32x8) Add(y Uint32x8) Uint32x8
-// Absolute computes the absolute value of each element.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPABSD, CPU Feature: AVX512EVEX
-func (x Int32x16) Absolute() Int32x16
+// Asm: VPADDD, CPU Feature: AVX512EVEX
+func (x Uint32x16) Add(y Uint32x16) Uint32x16
-// PopCount counts the number of set bits in each element.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
-func (x Int32x16) PopCount() Int32x16
+// Asm: VPADDQ, CPU Feature: AVX
+func (x Uint64x2) Add(y Uint64x2) Uint64x2
-// Absolute computes the absolute value of each element.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPABSD, CPU Feature: AVX
-func (x Int32x4) Absolute() Int32x4
+// Asm: VPADDQ, CPU Feature: AVX2
+func (x Uint64x4) Add(y Uint64x4) Uint64x4
-// PopCount counts the number of set bits in each element.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
-func (x Int32x4) PopCount() Int32x4
+// Asm: VPADDQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) Add(y Uint64x8) Uint64x8
-// Absolute computes the absolute value of each element.
-//
-// Asm: VPABSD, CPU Feature: AVX2
-func (x Int32x8) Absolute() Int32x8
+/* AddSub */
-// PopCount counts the number of set bits in each element.
+// AddSub subtracts even elements and adds odd elements of two vectors.
//
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
-func (x Int32x8) PopCount() Int32x8
+// Asm: VADDSUBPS, CPU Feature: AVX
+func (x Float32x4) AddSub(y Float32x4) Float32x4
-// Absolute computes the absolute value of each element.
+// AddSub subtracts even elements and adds odd elements of two vectors.
//
-// Asm: VPABSQ, CPU Feature: AVX512EVEX
-func (x Int64x2) Absolute() Int64x2
+// Asm: VADDSUBPS, CPU Feature: AVX
+func (x Float32x8) AddSub(y Float32x8) Float32x8
-// PopCount counts the number of set bits in each element.
+// AddSub subtracts even elements and adds odd elements of two vectors.
//
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
-func (x Int64x2) PopCount() Int64x2
+// Asm: VADDSUBPD, CPU Feature: AVX
+func (x Float64x2) AddSub(y Float64x2) Float64x2
-// Absolute computes the absolute value of each element.
+// AddSub subtracts even elements and adds odd elements of two vectors.
//
-// Asm: VPABSQ, CPU Feature: AVX512EVEX
-func (x Int64x4) Absolute() Int64x4
+// Asm: VADDSUBPD, CPU Feature: AVX
+func (x Float64x4) AddSub(y Float64x4) Float64x4
-// PopCount counts the number of set bits in each element.
+/* And */
+
+// And performs a bitwise AND operation between two vectors.
//
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
-func (x Int64x4) PopCount() Int64x4
+// Asm: VANDPS, CPU Feature: AVX
+func (x Float32x4) And(y Float32x4) Float32x4
-// Absolute computes the absolute value of each element.
+// And performs a bitwise AND operation between two vectors.
//
-// Asm: VPABSQ, CPU Feature: AVX512EVEX
-func (x Int64x8) Absolute() Int64x8
+// Asm: VANDPS, CPU Feature: AVX
+func (x Float32x8) And(y Float32x8) Float32x8
-// PopCount counts the number of set bits in each element.
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
-func (x Int64x8) PopCount() Int64x8
+// Asm: VANDPS, CPU Feature: AVX512EVEX
+func (x Float32x16) And(y Float32x16) Float32x16
-// Absolute computes the absolute value of each element.
+// And performs a bitwise AND operation between two vectors.
//
-// Asm: VPABSB, CPU Feature: AVX
-func (x Int8x16) Absolute() Int8x16
+// Asm: VANDPD, CPU Feature: AVX
+func (x Float64x2) And(y Float64x2) Float64x2
-// PopCount counts the number of set bits in each element.
+// And performs a bitwise AND operation between two vectors.
//
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
-func (x Int8x16) PopCount() Int8x16
+// Asm: VANDPD, CPU Feature: AVX
+func (x Float64x4) And(y Float64x4) Float64x4
-// Absolute computes the absolute value of each element.
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VPABSB, CPU Feature: AVX2
-func (x Int8x32) Absolute() Int8x32
+// Asm: VANDPD, CPU Feature: AVX512EVEX
+func (x Float64x8) And(y Float64x8) Float64x8
-// PopCount counts the number of set bits in each element.
+// And performs a bitwise AND operation between two vectors.
//
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
-func (x Int8x32) PopCount() Int8x32
+// Asm: VPAND, CPU Feature: AVX
+func (x Int8x16) And(y Int8x16) Int8x16
-// Absolute computes the absolute value of each element.
+// And performs a bitwise AND operation between two vectors.
//
-// Asm: VPABSB, CPU Feature: AVX512EVEX
-func (x Int8x64) Absolute() Int8x64
+// Asm: VPAND, CPU Feature: AVX2
+func (x Int8x32) And(y Int8x32) Int8x32
-// PopCount counts the number of set bits in each element.
+// And performs a bitwise AND operation between two vectors.
//
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
-func (x Int8x64) PopCount() Int8x64
+// Asm: VPAND, CPU Feature: AVX
+func (x Int16x8) And(y Int16x8) Int16x8
-// PopCount counts the number of set bits in each element.
+// And performs a bitwise AND operation between two vectors.
//
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
-func (x Uint16x16) PopCount() Uint16x16
+// Asm: VPAND, CPU Feature: AVX2
+func (x Int16x16) And(y Int16x16) Int16x16
-// PopCount counts the number of set bits in each element.
+// And performs a bitwise AND operation between two vectors.
//
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
-func (x Uint16x32) PopCount() Uint16x32
+// Asm: VPAND, CPU Feature: AVX
+func (x Int32x4) And(y Int32x4) Int32x4
-// PopCount counts the number of set bits in each element.
+// And performs a bitwise AND operation between two vectors.
//
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
-func (x Uint16x8) PopCount() Uint16x8
+// Asm: VPAND, CPU Feature: AVX2
+func (x Int32x8) And(y Int32x8) Int32x8
-// PopCount counts the number of set bits in each element.
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
-func (x Uint32x16) PopCount() Uint32x16
+// Asm: VPANDD, CPU Feature: AVX512EVEX
+func (x Int32x16) And(y Int32x16) Int32x16
-// PopCount counts the number of set bits in each element.
+// And performs a bitwise AND operation between two vectors.
//
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
-func (x Uint32x4) PopCount() Uint32x4
+// Asm: VPAND, CPU Feature: AVX
+func (x Int64x2) And(y Int64x2) Int64x2
-// PopCount counts the number of set bits in each element.
+// And performs a bitwise AND operation between two vectors.
//
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
-func (x Uint32x8) PopCount() Uint32x8
+// Asm: VPAND, CPU Feature: AVX2
+func (x Int64x4) And(y Int64x4) Int64x4
-// PopCount counts the number of set bits in each element.
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) PopCount() Uint64x2
+// Asm: VPANDQ, CPU Feature: AVX512EVEX
+func (x Int64x8) And(y Int64x8) Int64x8
-// PopCount counts the number of set bits in each element.
+// And performs a bitwise AND operation between two vectors.
//
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) PopCount() Uint64x4
+// Asm: VPAND, CPU Feature: AVX
+func (x Uint8x16) And(y Uint8x16) Uint8x16
-// PopCount counts the number of set bits in each element.
+// And performs a bitwise AND operation between two vectors.
//
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) PopCount() Uint64x8
+// Asm: VPAND, CPU Feature: AVX2
+func (x Uint8x32) And(y Uint8x32) Uint8x32
-// PopCount counts the number of set bits in each element.
+// And performs a bitwise AND operation between two vectors.
//
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
-func (x Uint8x16) PopCount() Uint8x16
+// Asm: VPAND, CPU Feature: AVX
+func (x Uint16x8) And(y Uint16x8) Uint16x8
-// PopCount counts the number of set bits in each element.
+// And performs a bitwise AND operation between two vectors.
//
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
-func (x Uint8x32) PopCount() Uint8x32
+// Asm: VPAND, CPU Feature: AVX2
+func (x Uint16x16) And(y Uint16x16) Uint16x16
-// PopCount counts the number of set bits in each element.
+// And performs a bitwise AND operation between two vectors.
//
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
-func (x Uint8x64) PopCount() Uint8x64
+// Asm: VPAND, CPU Feature: AVX
+func (x Uint32x4) And(y Uint32x4) Uint32x4
-// Add adds corresponding elements of two vectors.
+// And performs a bitwise AND operation between two vectors.
//
-// Asm: VADDPS, CPU Feature: AVX512EVEX
-func (x Float32x16) Add(y Float32x16) Float32x16
+// Asm: VPAND, CPU Feature: AVX2
+func (x Uint32x8) And(y Uint32x8) Uint32x8
// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VANDPS, CPU Feature: AVX512EVEX
-func (x Float32x16) And(y Float32x16) Float32x16
+// Asm: VPANDD, CPU Feature: AVX512EVEX
+func (x Uint32x16) And(y Uint32x16) Uint32x16
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// And performs a bitwise AND operation between two vectors.
//
-// Asm: VANDNPS, CPU Feature: AVX512EVEX
-func (x Float32x16) AndNot(y Float32x16) Float32x16
+// Asm: VPAND, CPU Feature: AVX
+func (x Uint64x2) And(y Uint64x2) Uint64x2
-// Div divides elements of two vectors.
+// And performs a bitwise AND operation between two vectors.
//
-// Asm: VDIVPS, CPU Feature: AVX512EVEX
-func (x Float32x16) Div(y Float32x16) Float32x16
+// Asm: VPAND, CPU Feature: AVX2
+func (x Uint64x4) And(y Uint64x4) Uint64x4
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x16) Equal(y Float32x16) Mask32x16
+// Asm: VPANDQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) And(y Uint64x8) Uint64x8
-// Greater compares for greater than.
-// Const Immediate = 6.
-//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x16) Greater(y Float32x16) Mask32x16
+/* AndNot */
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// AndNot performs a bitwise AND NOT operation between two vectors.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x16) GreaterEqual(y Float32x16) Mask32x16
+// Asm: VANDNPS, CPU Feature: AVX
+func (x Float32x4) AndNot(y Float32x4) Float32x4
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
-// Const Immediate = 3.
+// AndNot performs a bitwise AND NOT operation between two vectors.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x16) IsNan(y Float32x16) Mask32x16
+// Asm: VANDNPS, CPU Feature: AVX
+func (x Float32x8) AndNot(y Float32x8) Float32x8
-// Less compares for less than.
-// Const Immediate = 1.
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x16) Less(y Float32x16) Mask32x16
+// Asm: VANDNPS, CPU Feature: AVX512EVEX
+func (x Float32x16) AndNot(y Float32x16) Float32x16
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// AndNot performs a bitwise AND NOT operation between two vectors.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x16) LessEqual(y Float32x16) Mask32x16
+// Asm: VANDNPD, CPU Feature: AVX
+func (x Float64x2) AndNot(y Float64x2) Float64x2
-// ApproximateReciprocal computes an approximate reciprocal of each element.
+// AndNot performs a bitwise AND NOT operation between two vectors.
//
-// Asm: VRCP14PS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedApproximateReciprocal(y Mask32x16) Float32x16
+// Asm: VANDNPD, CPU Feature: AVX
+func (x Float64x4) AndNot(y Float64x4) Float64x4
-// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedApproximateReciprocalOfSqrt(y Mask32x16) Float32x16
+// Asm: VANDNPD, CPU Feature: AVX512EVEX
+func (x Float64x8) AndNot(y Float64x8) Float64x8
-// Sqrt computes the square root of each element.
+// AndNot performs a bitwise AND NOT operation between two vectors.
//
-// Asm: VSQRTPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedSqrt(y Mask32x16) Float32x16
+// Asm: VPANDN, CPU Feature: AVX
+func (x Int8x16) AndNot(y Int8x16) Int8x16
-// Max computes the maximum of corresponding elements.
+// AndNot performs a bitwise AND NOT operation between two vectors.
//
-// Asm: VMAXPS, CPU Feature: AVX512EVEX
-func (x Float32x16) Max(y Float32x16) Float32x16
+// Asm: VPANDN, CPU Feature: AVX2
+func (x Int8x32) AndNot(y Int8x32) Int8x32
-// Min computes the minimum of corresponding elements.
+// AndNot performs a bitwise AND NOT operation between two vectors.
//
-// Asm: VMINPS, CPU Feature: AVX512EVEX
-func (x Float32x16) Min(y Float32x16) Float32x16
+// Asm: VPANDN, CPU Feature: AVX
+func (x Int16x8) AndNot(y Int16x8) Int16x8
-// Mul multiplies corresponding elements of two vectors, masked.
+// AndNot performs a bitwise AND NOT operation between two vectors.
//
-// Asm: VMULPS, CPU Feature: AVX512EVEX
-func (x Float32x16) Mul(y Float32x16) Float32x16
+// Asm: VPANDN, CPU Feature: AVX2
+func (x Int16x16) AndNot(y Int16x16) Int16x16
-// MulByPowOf2 multiplies elements by a power of 2.
+// AndNot performs a bitwise AND NOT operation between two vectors.
//
-// Asm: VSCALEFPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MulByPowOf2(y Float32x16) Float32x16
+// Asm: VPANDN, CPU Feature: AVX
+func (x Int32x4) AndNot(y Int32x4) Int32x4
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// AndNot performs a bitwise AND NOT operation between two vectors.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x16) NotEqual(y Float32x16) Mask32x16
+// Asm: VPANDN, CPU Feature: AVX2
+func (x Int32x8) AndNot(y Int32x8) Int32x8
-// Or performs a masked bitwise OR operation between two vectors.
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VORPS, CPU Feature: AVX512EVEX
-func (x Float32x16) Or(y Float32x16) Float32x16
+// Asm: VPANDND, CPU Feature: AVX512EVEX
+func (x Int32x16) AndNot(y Int32x16) Int32x16
-// Sub subtracts corresponding elements of two vectors.
+// AndNot performs a bitwise AND NOT operation between two vectors.
//
-// Asm: VADDPS, CPU Feature: AVX512EVEX
-func (x Float32x16) Sub(y Float32x16) Float32x16
+// Asm: VPANDN, CPU Feature: AVX
+func (x Int64x2) AndNot(y Int64x2) Int64x2
-// Xor performs a masked bitwise XOR operation between two vectors.
+// AndNot performs a bitwise AND NOT operation between two vectors.
//
-// Asm: VXORPS, CPU Feature: AVX512EVEX
-func (x Float32x16) Xor(y Float32x16) Float32x16
+// Asm: VPANDN, CPU Feature: AVX2
+func (x Int64x4) AndNot(y Int64x4) Int64x4
-// Add adds corresponding elements of two vectors.
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VADDPS, CPU Feature: AVX
-func (x Float32x4) Add(y Float32x4) Float32x4
+// Asm: VPANDNQ, CPU Feature: AVX512EVEX
+func (x Int64x8) AndNot(y Int64x8) Int64x8
-// AddSub subtracts even elements and adds odd elements of two vectors.
+// AndNot performs a bitwise AND NOT operation between two vectors.
//
-// Asm: VADDSUBPS, CPU Feature: AVX
-func (x Float32x4) AddSub(y Float32x4) Float32x4
+// Asm: VPANDN, CPU Feature: AVX
+func (x Uint8x16) AndNot(y Uint8x16) Uint8x16
-// And performs a bitwise AND operation between two vectors.
+// AndNot performs a bitwise AND NOT operation between two vectors.
//
-// Asm: VANDPS, CPU Feature: AVX
-func (x Float32x4) And(y Float32x4) Float32x4
+// Asm: VPANDN, CPU Feature: AVX2
+func (x Uint8x32) AndNot(y Uint8x32) Uint8x32
// AndNot performs a bitwise AND NOT operation between two vectors.
//
-// Asm: VANDNPS, CPU Feature: AVX
-func (x Float32x4) AndNot(y Float32x4) Float32x4
+// Asm: VPANDN, CPU Feature: AVX
+func (x Uint16x8) AndNot(y Uint16x8) Uint16x8
-// Div divides elements of two vectors.
+// AndNot performs a bitwise AND NOT operation between two vectors.
//
-// Asm: VDIVPS, CPU Feature: AVX
-func (x Float32x4) Div(y Float32x4) Float32x4
+// Asm: VPANDN, CPU Feature: AVX2
+func (x Uint16x16) AndNot(y Uint16x16) Uint16x16
-// Equal compares for equality.
-// Const Immediate = 0.
+// AndNot performs a bitwise AND NOT operation between two vectors.
//
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x4) Equal(y Float32x4) Mask32x4
+// Asm: VPANDN, CPU Feature: AVX
+func (x Uint32x4) AndNot(y Uint32x4) Uint32x4
-// Greater compares for greater than.
-// Const Immediate = 6.
+// AndNot performs a bitwise AND NOT operation between two vectors.
//
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x4) Greater(y Float32x4) Mask32x4
+// Asm: VPANDN, CPU Feature: AVX2
+func (x Uint32x8) AndNot(y Uint32x8) Uint32x8
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x4) GreaterEqual(y Float32x4) Mask32x4
+// Asm: VPANDND, CPU Feature: AVX512EVEX
+func (x Uint32x16) AndNot(y Uint32x16) Uint32x16
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
-// Const Immediate = 3.
+// AndNot performs a bitwise AND NOT operation between two vectors.
//
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x4) IsNan(y Float32x4) Mask32x4
+// Asm: VPANDN, CPU Feature: AVX
+func (x Uint64x2) AndNot(y Uint64x2) Uint64x2
-// Less compares for less than.
-// Const Immediate = 1.
+// AndNot performs a bitwise AND NOT operation between two vectors.
//
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x4) Less(y Float32x4) Mask32x4
+// Asm: VPANDN, CPU Feature: AVX2
+func (x Uint64x4) AndNot(y Uint64x4) Uint64x4
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x4) LessEqual(y Float32x4) Mask32x4
+// Asm: VPANDNQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) AndNot(y Uint64x8) Uint64x8
+
+/* ApproximateReciprocal */
// ApproximateReciprocal computes an approximate reciprocal of each element.
//
// Asm: VRCP14PS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedApproximateReciprocal(y Mask32x4) Float32x4
+func (x Float32x4) ApproximateReciprocal() Float32x4
-// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
+// ApproximateReciprocal computes an approximate reciprocal of each element.
//
-// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedApproximateReciprocalOfSqrt(y Mask32x4) Float32x4
+// Asm: VRCP14PS, CPU Feature: AVX512EVEX
+func (x Float32x8) ApproximateReciprocal() Float32x8
-// Sqrt computes the square root of each element.
+// ApproximateReciprocal computes an approximate reciprocal of each element.
//
-// Asm: VSQRTPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedSqrt(y Mask32x4) Float32x4
+// Asm: VRCP14PS, CPU Feature: AVX512EVEX
+func (x Float32x16) ApproximateReciprocal() Float32x16
-// Max computes the maximum of corresponding elements.
+// ApproximateReciprocal computes an approximate reciprocal of each element.
//
-// Asm: VMAXPS, CPU Feature: AVX
-func (x Float32x4) Max(y Float32x4) Float32x4
+// Asm: VRCP14PD, CPU Feature: AVX512EVEX
+func (x Float64x2) ApproximateReciprocal() Float64x2
-// Min computes the minimum of corresponding elements.
+// ApproximateReciprocal computes an approximate reciprocal of each element.
//
-// Asm: VMINPS, CPU Feature: AVX
-func (x Float32x4) Min(y Float32x4) Float32x4
+// Asm: VRCP14PD, CPU Feature: AVX512EVEX
+func (x Float64x4) ApproximateReciprocal() Float64x4
-// Mul multiplies corresponding elements of two vectors.
+// ApproximateReciprocal computes an approximate reciprocal of each element.
//
-// Asm: VMULPS, CPU Feature: AVX
-func (x Float32x4) Mul(y Float32x4) Float32x4
+// Asm: VRCP14PD, CPU Feature: AVX512EVEX
+func (x Float64x8) ApproximateReciprocal() Float64x8
-// MulByPowOf2 multiplies elements by a power of 2.
-//
-// Asm: VSCALEFPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MulByPowOf2(y Float32x4) Float32x4
+/* ApproximateReciprocalOfSqrt */
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
//
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x4) NotEqual(y Float32x4) Mask32x4
+// Asm: VRSQRTPS, CPU Feature: AVX
+func (x Float32x4) ApproximateReciprocalOfSqrt() Float32x4
-// Or performs a bitwise OR operation between two vectors.
+// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
//
-// Asm: VORPS, CPU Feature: AVX
-func (x Float32x4) Or(y Float32x4) Float32x4
+// Asm: VRSQRTPS, CPU Feature: AVX
+func (x Float32x8) ApproximateReciprocalOfSqrt() Float32x8
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
//
-// Asm: VHADDPS, CPU Feature: AVX
-func (x Float32x4) PairwiseAdd(y Float32x4) Float32x4
+// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX
+func (x Float32x16) ApproximateReciprocalOfSqrt() Float32x16
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
//
-// Asm: VHSUBPS, CPU Feature: AVX
-func (x Float32x4) PairwiseSub(y Float32x4) Float32x4
+// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX
+func (x Float64x2) ApproximateReciprocalOfSqrt() Float64x2
-// Sub subtracts corresponding elements of two vectors.
+// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
//
-// Asm: VADDPS, CPU Feature: AVX
-func (x Float32x4) Sub(y Float32x4) Float32x4
+// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX
+func (x Float64x4) ApproximateReciprocalOfSqrt() Float64x4
-// Xor performs a bitwise XOR operation between two vectors.
+// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
//
-// Asm: VXORPS, CPU Feature: AVX
-func (x Float32x4) Xor(y Float32x4) Float32x4
+// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX
+func (x Float64x8) ApproximateReciprocalOfSqrt() Float64x8
-// Add adds corresponding elements of two vectors.
-//
-// Asm: VADDPS, CPU Feature: AVX
-func (x Float32x8) Add(y Float32x8) Float32x8
+/* Average */
-// AddSub subtracts even elements and adds odd elements of two vectors.
+// Average computes the rounded average of corresponding elements.
//
-// Asm: VADDSUBPS, CPU Feature: AVX
-func (x Float32x8) AddSub(y Float32x8) Float32x8
+// Asm: VPAVGB, CPU Feature: AVX
+func (x Uint8x16) Average(y Uint8x16) Uint8x16
-// And performs a bitwise AND operation between two vectors.
+// Average computes the rounded average of corresponding elements.
//
-// Asm: VANDPS, CPU Feature: AVX
-func (x Float32x8) And(y Float32x8) Float32x8
+// Asm: VPAVGB, CPU Feature: AVX2
+func (x Uint8x32) Average(y Uint8x32) Uint8x32
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// Average computes the rounded average of corresponding elements.
//
-// Asm: VANDNPS, CPU Feature: AVX
-func (x Float32x8) AndNot(y Float32x8) Float32x8
+// Asm: VPAVGB, CPU Feature: AVX512EVEX
+func (x Uint8x64) Average(y Uint8x64) Uint8x64
-// Div divides elements of two vectors.
+// Average computes the rounded average of corresponding elements.
//
-// Asm: VDIVPS, CPU Feature: AVX
-func (x Float32x8) Div(y Float32x8) Float32x8
+// Asm: VPAVGW, CPU Feature: AVX
+func (x Uint16x8) Average(y Uint16x8) Uint16x8
-// Equal compares for equality.
-// Const Immediate = 0.
+// Average computes the rounded average of corresponding elements.
//
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x8) Equal(y Float32x8) Mask32x8
+// Asm: VPAVGW, CPU Feature: AVX2
+func (x Uint16x16) Average(y Uint16x16) Uint16x16
-// Greater compares for greater than.
-// Const Immediate = 6.
+// Average computes the rounded average of corresponding elements.
//
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x8) Greater(y Float32x8) Mask32x8
+// Asm: VPAVGW, CPU Feature: AVX512EVEX
+func (x Uint16x32) Average(y Uint16x32) Uint16x32
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
-//
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x8) GreaterEqual(y Float32x8) Mask32x8
+/* Ceil */
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
-// Const Immediate = 3.
+// Ceil rounds elements up to the nearest integer.
+// Const Immediate = 2.
//
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x8) IsNan(y Float32x8) Mask32x8
+// Asm: VROUNDPS, CPU Feature: AVX
+func (x Float32x4) Ceil() Float32x4
-// Less compares for less than.
-// Const Immediate = 1.
+// Ceil rounds elements up to the nearest integer.
+// Const Immediate = 2.
//
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x8) Less(y Float32x8) Mask32x8
-
-// LessEqual compares for less than or equal.
+// Asm: VROUNDPS, CPU Feature: AVX
+func (x Float32x8) Ceil() Float32x8
+
+// Ceil rounds elements up to the nearest integer.
// Const Immediate = 2.
//
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x8) LessEqual(y Float32x8) Mask32x8
+// Asm: VROUNDPD, CPU Feature: AVX
+func (x Float64x2) Ceil() Float64x2
-// ApproximateReciprocal computes an approximate reciprocal of each element.
+// Ceil rounds elements up to the nearest integer.
+// Const Immediate = 2.
//
-// Asm: VRCP14PS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedApproximateReciprocal(y Mask32x8) Float32x8
+// Asm: VROUNDPD, CPU Feature: AVX
+func (x Float64x4) Ceil() Float64x4
-// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
-//
-// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedApproximateReciprocalOfSqrt(y Mask32x8) Float32x8
+/* CeilSuppressExceptionWithPrecision */
-// Sqrt computes the square root of each element.
+// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VSQRTPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedSqrt(y Mask32x8) Float32x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) CeilSuppressExceptionWithPrecision(imm8 uint8) Float32x4
-// Max computes the maximum of corresponding elements.
+// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VMAXPS, CPU Feature: AVX
-func (x Float32x8) Max(y Float32x8) Float32x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) CeilSuppressExceptionWithPrecision(imm8 uint8) Float32x8
-// Min computes the minimum of corresponding elements.
+// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VMINPS, CPU Feature: AVX
-func (x Float32x8) Min(y Float32x8) Float32x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) CeilSuppressExceptionWithPrecision(imm8 uint8) Float32x16
-// Mul multiplies corresponding elements of two vectors.
+// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VMULPS, CPU Feature: AVX
-func (x Float32x8) Mul(y Float32x8) Float32x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) CeilSuppressExceptionWithPrecision(imm8 uint8) Float64x2
-// MulByPowOf2 multiplies elements by a power of 2.
+// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VSCALEFPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MulByPowOf2(y Float32x8) Float32x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) CeilSuppressExceptionWithPrecision(imm8 uint8) Float64x4
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x8) NotEqual(y Float32x8) Mask32x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) CeilSuppressExceptionWithPrecision(imm8 uint8) Float64x8
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VORPS, CPU Feature: AVX
-func (x Float32x8) Or(y Float32x8) Float32x8
+/* CeilWithPrecision */
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// CeilWithPrecision rounds elements up with specified precision, masked.
+// Const Immediate = 2.
//
-// Asm: VHADDPS, CPU Feature: AVX
-func (x Float32x8) PairwiseAdd(y Float32x8) Float32x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) CeilWithPrecision(imm8 uint8) Float32x4
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// CeilWithPrecision rounds elements up with specified precision, masked.
+// Const Immediate = 2.
//
-// Asm: VHSUBPS, CPU Feature: AVX
-func (x Float32x8) PairwiseSub(y Float32x8) Float32x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) CeilWithPrecision(imm8 uint8) Float32x8
-// Sub subtracts corresponding elements of two vectors.
+// CeilWithPrecision rounds elements up with specified precision, masked.
+// Const Immediate = 2.
//
-// Asm: VADDPS, CPU Feature: AVX
-func (x Float32x8) Sub(y Float32x8) Float32x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) CeilWithPrecision(imm8 uint8) Float32x16
-// Xor performs a bitwise XOR operation between two vectors.
+// CeilWithPrecision rounds elements up with specified precision, masked.
+// Const Immediate = 2.
//
-// Asm: VXORPS, CPU Feature: AVX
-func (x Float32x8) Xor(y Float32x8) Float32x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) CeilWithPrecision(imm8 uint8) Float64x2
-// Add adds corresponding elements of two vectors.
+// CeilWithPrecision rounds elements up with specified precision, masked.
+// Const Immediate = 2.
//
-// Asm: VADDPD, CPU Feature: AVX
-func (x Float64x2) Add(y Float64x2) Float64x2
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) CeilWithPrecision(imm8 uint8) Float64x4
-// AddSub subtracts even elements and adds odd elements of two vectors.
+// CeilWithPrecision rounds elements up with specified precision, masked.
+// Const Immediate = 2.
//
-// Asm: VADDSUBPD, CPU Feature: AVX
-func (x Float64x2) AddSub(y Float64x2) Float64x2
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) CeilWithPrecision(imm8 uint8) Float64x8
-// And performs a bitwise AND operation between two vectors.
-//
-// Asm: VANDPD, CPU Feature: AVX
-func (x Float64x2) And(y Float64x2) Float64x2
+/* DiffWithCeilSuppressExceptionWithPrecision */
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VANDNPD, CPU Feature: AVX
-func (x Float64x2) AndNot(y Float64x2) Float64x2
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float32x4
-// Div divides elements of two vectors.
+// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VDIVPD, CPU Feature: AVX
-func (x Float64x2) Div(y Float64x2) Float64x2
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float32x8
-// DotProdBroadcast multiplies all elements and broadcasts the sum.
-// Const Immediate = 127.
+// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VDPPD, CPU Feature: AVX
-func (x Float64x2) DotProdBroadcast(y Float64x2) Float64x2
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float32x16
-// Equal compares for equality.
-// Const Immediate = 0.
+// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x2) Equal(y Float64x2) Mask64x2
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float64x2
-// Greater compares for greater than.
-// Const Immediate = 6.
+// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x2) Greater(y Float64x2) Mask64x2
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float64x4
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x2) GreaterEqual(y Float64x2) Mask64x2
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float64x8
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
-// Const Immediate = 3.
-//
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x2) IsNan(y Float64x2) Mask64x2
+/* DiffWithCeilWithPrecision */
-// Less compares for less than.
-// Const Immediate = 1.
+// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
+// Const Immediate = 2.
//
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x2) Less(y Float64x2) Mask64x2
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) DiffWithCeilWithPrecision(imm8 uint8) Float32x4
-// LessEqual compares for less than or equal.
+// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
// Const Immediate = 2.
//
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x2) LessEqual(y Float64x2) Mask64x2
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) DiffWithCeilWithPrecision(imm8 uint8) Float32x8
-// ApproximateReciprocal computes an approximate reciprocal of each element.
+// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
+// Const Immediate = 2.
//
-// Asm: VRCP14PD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedApproximateReciprocal(y Mask64x2) Float64x2
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) DiffWithCeilWithPrecision(imm8 uint8) Float32x16
-// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
+// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
+// Const Immediate = 2.
//
-// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedApproximateReciprocalOfSqrt(y Mask64x2) Float64x2
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) DiffWithCeilWithPrecision(imm8 uint8) Float64x2
-// Sqrt computes the square root of each element.
+// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
+// Const Immediate = 2.
//
-// Asm: VSQRTPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedSqrt(y Mask64x2) Float64x2
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) DiffWithCeilWithPrecision(imm8 uint8) Float64x4
-// Max computes the maximum of corresponding elements.
+// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
+// Const Immediate = 2.
//
-// Asm: VMAXPD, CPU Feature: AVX
-func (x Float64x2) Max(y Float64x2) Float64x2
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) DiffWithCeilWithPrecision(imm8 uint8) Float64x8
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VMINPD, CPU Feature: AVX
-func (x Float64x2) Min(y Float64x2) Float64x2
+/* DiffWithFloorSuppressExceptionWithPrecision */
-// Mul multiplies corresponding elements of two vectors.
+// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
+// Const Immediate = 9.
//
-// Asm: VMULPD, CPU Feature: AVX
-func (x Float64x2) Mul(y Float64x2) Float64x2
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float32x4
-// MulByPowOf2 multiplies elements by a power of 2.
+// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
+// Const Immediate = 9.
//
-// Asm: VSCALEFPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MulByPowOf2(y Float64x2) Float64x2
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float32x8
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
+// Const Immediate = 9.
//
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x2) NotEqual(y Float64x2) Mask64x2
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float32x16
-// Or performs a bitwise OR operation between two vectors.
+// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
+// Const Immediate = 9.
//
-// Asm: VORPD, CPU Feature: AVX
-func (x Float64x2) Or(y Float64x2) Float64x2
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float64x2
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
+// Const Immediate = 9.
//
-// Asm: VHADDPD, CPU Feature: AVX
-func (x Float64x2) PairwiseAdd(y Float64x2) Float64x2
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float64x4
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
+// Const Immediate = 9.
//
-// Asm: VHSUBPD, CPU Feature: AVX
-func (x Float64x2) PairwiseSub(y Float64x2) Float64x2
-
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VADDPD, CPU Feature: AVX
-func (x Float64x2) Sub(y Float64x2) Float64x2
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float64x8
-// Xor performs a bitwise XOR operation between two vectors.
-//
-// Asm: VXORPD, CPU Feature: AVX
-func (x Float64x2) Xor(y Float64x2) Float64x2
+/* DiffWithFloorWithPrecision */
-// Add adds corresponding elements of two vectors.
+// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
+// Const Immediate = 1.
//
-// Asm: VADDPD, CPU Feature: AVX
-func (x Float64x4) Add(y Float64x4) Float64x4
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) DiffWithFloorWithPrecision(imm8 uint8) Float32x4
-// AddSub subtracts even elements and adds odd elements of two vectors.
+// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
+// Const Immediate = 1.
//
-// Asm: VADDSUBPD, CPU Feature: AVX
-func (x Float64x4) AddSub(y Float64x4) Float64x4
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) DiffWithFloorWithPrecision(imm8 uint8) Float32x8
-// And performs a bitwise AND operation between two vectors.
+// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
+// Const Immediate = 1.
//
-// Asm: VANDPD, CPU Feature: AVX
-func (x Float64x4) And(y Float64x4) Float64x4
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) DiffWithFloorWithPrecision(imm8 uint8) Float32x16
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
+// Const Immediate = 1.
//
-// Asm: VANDNPD, CPU Feature: AVX
-func (x Float64x4) AndNot(y Float64x4) Float64x4
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) DiffWithFloorWithPrecision(imm8 uint8) Float64x2
-// Div divides elements of two vectors.
+// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
+// Const Immediate = 1.
//
-// Asm: VDIVPD, CPU Feature: AVX
-func (x Float64x4) Div(y Float64x4) Float64x4
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) DiffWithFloorWithPrecision(imm8 uint8) Float64x4
-// Equal compares for equality.
-// Const Immediate = 0.
+// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
+// Const Immediate = 1.
//
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x4) Equal(y Float64x4) Mask64x4
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) DiffWithFloorWithPrecision(imm8 uint8) Float64x8
-// Greater compares for greater than.
-// Const Immediate = 6.
-//
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x4) Greater(y Float64x4) Mask64x4
+/* DiffWithRoundSuppressExceptionWithPrecision */
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
+// Const Immediate = 8.
//
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x4) GreaterEqual(y Float64x4) Mask64x4
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float32x4
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
-// Const Immediate = 3.
+// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
+// Const Immediate = 8.
//
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x4) IsNan(y Float64x4) Mask64x4
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float32x8
-// Less compares for less than.
-// Const Immediate = 1.
+// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
+// Const Immediate = 8.
//
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x4) Less(y Float64x4) Mask64x4
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float32x16
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
+// Const Immediate = 8.
//
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x4) LessEqual(y Float64x4) Mask64x4
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float64x2
-// ApproximateReciprocal computes an approximate reciprocal of each element.
+// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
+// Const Immediate = 8.
//
-// Asm: VRCP14PD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedApproximateReciprocal(y Mask64x4) Float64x4
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float64x4
-// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
+// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
+// Const Immediate = 8.
//
-// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedApproximateReciprocalOfSqrt(y Mask64x4) Float64x4
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float64x8
-// Sqrt computes the square root of each element.
-//
-// Asm: VSQRTPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedSqrt(y Mask64x4) Float64x4
+/* DiffWithRoundWithPrecision */
-// Max computes the maximum of corresponding elements.
+// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
+// Const Immediate = 0.
//
-// Asm: VMAXPD, CPU Feature: AVX
-func (x Float64x4) Max(y Float64x4) Float64x4
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) DiffWithRoundWithPrecision(imm8 uint8) Float32x4
-// Min computes the minimum of corresponding elements.
+// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
+// Const Immediate = 0.
//
-// Asm: VMINPD, CPU Feature: AVX
-func (x Float64x4) Min(y Float64x4) Float64x4
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) DiffWithRoundWithPrecision(imm8 uint8) Float32x8
-// Mul multiplies corresponding elements of two vectors.
+// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
+// Const Immediate = 0.
//
-// Asm: VMULPD, CPU Feature: AVX
-func (x Float64x4) Mul(y Float64x4) Float64x4
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) DiffWithRoundWithPrecision(imm8 uint8) Float32x16
-// MulByPowOf2 multiplies elements by a power of 2.
+// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
+// Const Immediate = 0.
//
-// Asm: VSCALEFPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MulByPowOf2(y Float64x4) Float64x4
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) DiffWithRoundWithPrecision(imm8 uint8) Float64x2
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
+// Const Immediate = 0.
//
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x4) NotEqual(y Float64x4) Mask64x4
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) DiffWithRoundWithPrecision(imm8 uint8) Float64x4
-// Or performs a bitwise OR operation between two vectors.
+// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
+// Const Immediate = 0.
//
-// Asm: VORPD, CPU Feature: AVX
-func (x Float64x4) Or(y Float64x4) Float64x4
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) DiffWithRoundWithPrecision(imm8 uint8) Float64x8
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VHADDPD, CPU Feature: AVX
-func (x Float64x4) PairwiseAdd(y Float64x4) Float64x4
+/* DiffWithTruncSuppressExceptionWithPrecision */
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
+// Const Immediate = 11.
//
-// Asm: VHSUBPD, CPU Feature: AVX
-func (x Float64x4) PairwiseSub(y Float64x4) Float64x4
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float32x4
-// Sub subtracts corresponding elements of two vectors.
+// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
+// Const Immediate = 11.
//
-// Asm: VADDPD, CPU Feature: AVX
-func (x Float64x4) Sub(y Float64x4) Float64x4
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float32x8
-// Xor performs a bitwise XOR operation between two vectors.
+// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
+// Const Immediate = 11.
//
-// Asm: VXORPD, CPU Feature: AVX
-func (x Float64x4) Xor(y Float64x4) Float64x4
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float32x16
-// Add adds corresponding elements of two vectors.
+// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
+// Const Immediate = 11.
//
-// Asm: VADDPD, CPU Feature: AVX512EVEX
-func (x Float64x8) Add(y Float64x8) Float64x8
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float64x2
-// And performs a masked bitwise AND operation between two vectors.
+// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
+// Const Immediate = 11.
//
-// Asm: VANDPD, CPU Feature: AVX512EVEX
-func (x Float64x8) And(y Float64x8) Float64x8
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float64x4
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
+// Const Immediate = 11.
//
-// Asm: VANDNPD, CPU Feature: AVX512EVEX
-func (x Float64x8) AndNot(y Float64x8) Float64x8
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float64x8
-// Div divides elements of two vectors.
-//
-// Asm: VDIVPD, CPU Feature: AVX512EVEX
-func (x Float64x8) Div(y Float64x8) Float64x8
+/* DiffWithTruncWithPrecision */
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
+// Const Immediate = 3.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x8) Equal(y Float64x8) Mask64x8
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) DiffWithTruncWithPrecision(imm8 uint8) Float32x4
-// Greater compares for greater than.
-// Const Immediate = 6.
+// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
+// Const Immediate = 3.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x8) Greater(y Float64x8) Mask64x8
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) DiffWithTruncWithPrecision(imm8 uint8) Float32x8
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
+// Const Immediate = 3.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x8) GreaterEqual(y Float64x8) Mask64x8
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) DiffWithTruncWithPrecision(imm8 uint8) Float32x16
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
// Const Immediate = 3.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x8) IsNan(y Float64x8) Mask64x8
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) DiffWithTruncWithPrecision(imm8 uint8) Float64x2
-// Less compares for less than.
-// Const Immediate = 1.
+// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
+// Const Immediate = 3.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x8) Less(y Float64x8) Mask64x8
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) DiffWithTruncWithPrecision(imm8 uint8) Float64x4
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
+// Const Immediate = 3.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x8) LessEqual(y Float64x8) Mask64x8
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) DiffWithTruncWithPrecision(imm8 uint8) Float64x8
-// ApproximateReciprocal computes an approximate reciprocal of each element.
-//
-// Asm: VRCP14PD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedApproximateReciprocal(y Mask64x8) Float64x8
+/* Div */
-// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
+// Div divides elements of two vectors.
//
-// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedApproximateReciprocalOfSqrt(y Mask64x8) Float64x8
-
-// Sqrt computes the square root of each element.
-//
-// Asm: VSQRTPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedSqrt(y Mask64x8) Float64x8
+// Asm: VDIVPS, CPU Feature: AVX
+func (x Float32x4) Div(y Float32x4) Float32x4
-// Max computes the maximum of corresponding elements.
+// Div divides elements of two vectors.
//
-// Asm: VMAXPD, CPU Feature: AVX512EVEX
-func (x Float64x8) Max(y Float64x8) Float64x8
+// Asm: VDIVPS, CPU Feature: AVX
+func (x Float32x8) Div(y Float32x8) Float32x8
-// Min computes the minimum of corresponding elements.
+// Div divides elements of two vectors.
//
-// Asm: VMINPD, CPU Feature: AVX512EVEX
-func (x Float64x8) Min(y Float64x8) Float64x8
+// Asm: VDIVPS, CPU Feature: AVX512EVEX
+func (x Float32x16) Div(y Float32x16) Float32x16
-// Mul multiplies corresponding elements of two vectors, masked.
+// Div divides elements of two vectors.
//
-// Asm: VMULPD, CPU Feature: AVX512EVEX
-func (x Float64x8) Mul(y Float64x8) Float64x8
+// Asm: VDIVPD, CPU Feature: AVX
+func (x Float64x2) Div(y Float64x2) Float64x2
-// MulByPowOf2 multiplies elements by a power of 2.
+// Div divides elements of two vectors.
//
-// Asm: VSCALEFPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MulByPowOf2(y Float64x8) Float64x8
+// Asm: VDIVPD, CPU Feature: AVX
+func (x Float64x4) Div(y Float64x4) Float64x4
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// Div divides elements of two vectors.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x8) NotEqual(y Float64x8) Mask64x8
+// Asm: VDIVPD, CPU Feature: AVX512EVEX
+func (x Float64x8) Div(y Float64x8) Float64x8
-// Or performs a masked bitwise OR operation between two vectors.
-//
-// Asm: VORPD, CPU Feature: AVX512EVEX
-func (x Float64x8) Or(y Float64x8) Float64x8
+/* DotProdBroadcast */
-// Sub subtracts corresponding elements of two vectors.
+// DotProdBroadcast multiplies all elements and broadcasts the sum.
+// Const Immediate = 127.
//
-// Asm: VADDPD, CPU Feature: AVX512EVEX
-func (x Float64x8) Sub(y Float64x8) Float64x8
+// Asm: VDPPD, CPU Feature: AVX
+func (x Float64x2) DotProdBroadcast(y Float64x2) Float64x2
-// Xor performs a masked bitwise XOR operation between two vectors.
-//
-// Asm: VXORPD, CPU Feature: AVX512EVEX
-func (x Float64x8) Xor(y Float64x8) Float64x8
+/* Equal */
-// Add adds corresponding elements of two vectors.
+// Equal compares for equality.
+// Const Immediate = 0.
//
-// Asm: VPADDW, CPU Feature: AVX2
-func (x Int16x16) Add(y Int16x16) Int16x16
+// Asm: VPCMPEQB, CPU Feature: AVX
+func (x Int8x16) Equal(y Int8x16) Mask8x16
-// And performs a bitwise AND operation between two vectors.
+// Equal compares for equality.
+// Const Immediate = 0.
//
-// Asm: VPAND, CPU Feature: AVX2
-func (x Int16x16) And(y Int16x16) Int16x16
+// Asm: VPCMPEQB, CPU Feature: AVX2
+func (x Int8x32) Equal(y Int8x32) Mask8x32
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// Equal compares for equality.
+// Const Immediate = 0.
//
-// Asm: VPANDN, CPU Feature: AVX2
-func (x Int16x16) AndNot(y Int16x16) Int16x16
+// Asm: VPCMPEQW, CPU Feature: AVX
+func (x Int16x8) Equal(y Int16x8) Mask16x8
// Equal compares for equality.
// Const Immediate = 0.
// Asm: VPCMPEQW, CPU Feature: AVX2
func (x Int16x16) Equal(y Int16x16) Mask16x16
-// Greater compares for greater than.
-// Const Immediate = 6.
+// Equal compares for equality.
+// Const Immediate = 0.
//
-// Asm: VPCMPGTW, CPU Feature: AVX2
-func (x Int16x16) Greater(y Int16x16) Mask16x16
+// Asm: VPCMPEQD, CPU Feature: AVX
+func (x Int32x4) Equal(y Int32x4) Mask32x4
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// Equal compares for equality.
+// Const Immediate = 0.
//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16
+// Asm: VPCMPEQD, CPU Feature: AVX2
+func (x Int32x8) Equal(y Int32x8) Mask32x8
-// Less compares for less than.
-// Const Immediate = 1.
+// Equal compares for equality.
+// Const Immediate = 0.
//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x16) Less(y Int16x16) Mask16x16
+// Asm: VPCMPEQQ, CPU Feature: AVX
+func (x Int64x2) Equal(y Int64x2) Mask64x2
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// Equal compares for equality.
+// Const Immediate = 0.
//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x16) LessEqual(y Int16x16) Mask16x16
+// Asm: VPCMPEQQ, CPU Feature: AVX2
+func (x Int64x4) Equal(y Int64x4) Mask64x4
-// Absolute computes the absolute value of each element.
+// Equal compares for equality.
+// Const Immediate = 0.
//
-// Asm: VPABSW, CPU Feature: AVX512EVEX
-func (x Int16x16) MaskedAbsolute(y Mask16x16) Int16x16
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x4) Equal(y Float32x4) Mask32x4
-// PopCount counts the number of set bits in each element.
+// Equal compares for equality.
+// Const Immediate = 0.
//
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
-func (x Int16x16) MaskedPopCount(y Mask16x16) Int16x16
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x8) Equal(y Float32x8) Mask32x8
-// Max computes the maximum of corresponding elements.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VPMAXSW, CPU Feature: AVX2
-func (x Int16x16) Max(y Int16x16) Int16x16
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x16) Equal(y Float32x16) Mask32x16
-// Min computes the minimum of corresponding elements.
+// Equal compares for equality.
+// Const Immediate = 0.
//
-// Asm: VPMINSW, CPU Feature: AVX2
-func (x Int16x16) Min(y Int16x16) Int16x16
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x2) Equal(y Float64x2) Mask64x2
-// MulHigh multiplies elements and stores the high part of the result.
+// Equal compares for equality.
+// Const Immediate = 0.
//
-// Asm: VPMULHW, CPU Feature: AVX2
-func (x Int16x16) MulHigh(y Int16x16) Int16x16
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x4) Equal(y Float64x4) Mask64x4
-// MulLow multiplies elements and stores the low part of the result.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VPMULLW, CPU Feature: AVX2
-func (x Int16x16) MulLow(y Int16x16) Int16x16
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x8) Equal(y Float64x8) Mask64x8
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x16) NotEqual(y Int16x16) Mask16x16
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x64) Equal(y Int8x64) Mask8x64
-// Or performs a bitwise OR operation between two vectors.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Int16x16) Or(y Int16x16) Int16x16
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x32) Equal(y Int16x32) Mask16x32
-// PairDotProd multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VPMADDWD, CPU Feature: AVX2
-func (x Int16x16) PairDotProd(y Int16x16) Int32x8
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x16) Equal(y Int32x16) Mask32x16
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VPHADDW, CPU Feature: AVX2
-func (x Int16x16) PairwiseAdd(y Int16x16) Int16x16
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x8) Equal(y Int64x8) Mask64x8
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VPHSUBW, CPU Feature: AVX2
-func (x Int16x16) PairwiseSub(y Int16x16) Int16x16
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x16) Equal(y Uint8x16) Mask8x16
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VPADDSW, CPU Feature: AVX2
-func (x Int16x16) SaturatedAdd(y Int16x16) Int16x16
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x32) Equal(y Uint8x32) Mask8x32
-// SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VPHADDSW, CPU Feature: AVX2
-func (x Int16x16) SaturatedPairwiseAdd(y Int16x16) Int16x16
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x64) Equal(y Uint8x64) Mask8x64
-// SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VPHSUBSW, CPU Feature: AVX2
-func (x Int16x16) SaturatedPairwiseSub(y Int16x16) Int16x16
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x8) Equal(y Uint16x8) Mask16x8
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VPSUBSW, CPU Feature: AVX2
-func (x Int16x16) SaturatedSub(y Int16x16) Int16x16
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x16) Equal(y Uint16x16) Mask16x16
-// Sign returns the product of the first operand with -1, 0, or 1,
-// whichever constant is nearest to the value of the second operand.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VPSIGNW, CPU Feature: AVX2
-func (x Int16x16) Sign(y Int16x16) Int16x16
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x32) Equal(y Uint16x32) Mask16x32
-// Sub subtracts corresponding elements of two vectors.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VPSUBW, CPU Feature: AVX2
-func (x Int16x16) Sub(y Int16x16) Int16x16
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x4) Equal(y Uint32x4) Mask32x4
-// Xor performs a bitwise XOR operation between two vectors.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VPXOR, CPU Feature: AVX2
-func (x Int16x16) Xor(y Int16x16) Int16x16
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x8) Equal(y Uint32x8) Mask32x8
-// Add adds corresponding elements of two vectors.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VPADDW, CPU Feature: AVX512EVEX
-func (x Int16x32) Add(y Int16x32) Int16x32
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x16) Equal(y Uint32x16) Mask32x16
// Equal compares for equality, masked.
// Const Immediate = 0.
//
-// Asm: VPCMPEQW, CPU Feature: AVX512EVEX
-func (x Int16x32) Equal(y Int16x32) Mask16x32
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) Equal(y Uint64x2) Mask64x2
-// Greater compares for greater than.
-// Const Immediate = 6.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VPCMPGTW, CPU Feature: AVX512EVEX
-func (x Int16x32) Greater(y Int16x32) Mask16x32
-
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
-//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x32) GreaterEqual(y Int16x32) Mask16x32
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) Equal(y Uint64x4) Mask64x4
-// Less compares for less than.
-// Const Immediate = 1.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x32) Less(y Int16x32) Mask16x32
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) Equal(y Uint64x8) Mask64x8
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
-//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x32) LessEqual(y Int16x32) Mask16x32
+/* Floor */
-// Absolute computes the absolute value of each element.
+// Floor rounds elements down to the nearest integer.
+// Const Immediate = 1.
//
-// Asm: VPABSW, CPU Feature: AVX512EVEX
-func (x Int16x32) MaskedAbsolute(y Mask16x32) Int16x32
+// Asm: VROUNDPS, CPU Feature: AVX
+func (x Float32x4) Floor() Float32x4
-// PopCount counts the number of set bits in each element.
+// Floor rounds elements down to the nearest integer.
+// Const Immediate = 1.
//
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
-func (x Int16x32) MaskedPopCount(y Mask16x32) Int16x32
+// Asm: VROUNDPS, CPU Feature: AVX
+func (x Float32x8) Floor() Float32x8
-// Max computes the maximum of corresponding elements.
+// Floor rounds elements down to the nearest integer.
+// Const Immediate = 1.
//
-// Asm: VPMAXSW, CPU Feature: AVX512EVEX
-func (x Int16x32) Max(y Int16x32) Int16x32
+// Asm: VROUNDPD, CPU Feature: AVX
+func (x Float64x2) Floor() Float64x2
-// Min computes the minimum of corresponding elements.
+// Floor rounds elements down to the nearest integer.
+// Const Immediate = 1.
//
-// Asm: VPMINSW, CPU Feature: AVX512EVEX
-func (x Int16x32) Min(y Int16x32) Int16x32
+// Asm: VROUNDPD, CPU Feature: AVX
+func (x Float64x4) Floor() Float64x4
-// MulHigh multiplies elements and stores the high part of the result, masked.
-//
-// Asm: VPMULHW, CPU Feature: AVX512EVEX
-func (x Int16x32) MulHigh(y Int16x32) Int16x32
+/* FloorSuppressExceptionWithPrecision */
-// MulLow multiplies elements and stores the low part of the result, masked.
+// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
+// Const Immediate = 9.
//
-// Asm: VPMULLW, CPU Feature: AVX512EVEX
-func (x Int16x32) MulLow(y Int16x32) Int16x32
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) FloorSuppressExceptionWithPrecision(imm8 uint8) Float32x4
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
+// Const Immediate = 9.
//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x32) NotEqual(y Int16x32) Mask16x32
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) FloorSuppressExceptionWithPrecision(imm8 uint8) Float32x8
-// PairDotProd multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
+// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
+// Const Immediate = 9.
//
-// Asm: VPMADDWD, CPU Feature: AVX512EVEX
-func (x Int16x32) PairDotProd(y Int16x32) Int32x16
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) FloorSuppressExceptionWithPrecision(imm8 uint8) Float32x16
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
+// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
+// Const Immediate = 9.
//
-// Asm: VPADDSW, CPU Feature: AVX512EVEX
-func (x Int16x32) SaturatedAdd(y Int16x32) Int16x32
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) FloorSuppressExceptionWithPrecision(imm8 uint8) Float64x2
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
+// Const Immediate = 9.
//
-// Asm: VPSUBSW, CPU Feature: AVX512EVEX
-func (x Int16x32) SaturatedSub(y Int16x32) Int16x32
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) FloorSuppressExceptionWithPrecision(imm8 uint8) Float64x4
-// Sub subtracts corresponding elements of two vectors.
+// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
+// Const Immediate = 9.
//
-// Asm: VPSUBW, CPU Feature: AVX512EVEX
-func (x Int16x32) Sub(y Int16x32) Int16x32
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) FloorSuppressExceptionWithPrecision(imm8 uint8) Float64x8
-// Add adds corresponding elements of two vectors.
-//
-// Asm: VPADDW, CPU Feature: AVX
-func (x Int16x8) Add(y Int16x8) Int16x8
+/* FloorWithPrecision */
-// And performs a bitwise AND operation between two vectors.
+// FloorWithPrecision rounds elements down with specified precision, masked.
+// Const Immediate = 1.
//
-// Asm: VPAND, CPU Feature: AVX
-func (x Int16x8) And(y Int16x8) Int16x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) FloorWithPrecision(imm8 uint8) Float32x4
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// FloorWithPrecision rounds elements down with specified precision, masked.
+// Const Immediate = 1.
//
-// Asm: VPANDN, CPU Feature: AVX
-func (x Int16x8) AndNot(y Int16x8) Int16x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) FloorWithPrecision(imm8 uint8) Float32x8
-// Equal compares for equality.
-// Const Immediate = 0.
+// FloorWithPrecision rounds elements down with specified precision, masked.
+// Const Immediate = 1.
//
-// Asm: VPCMPEQW, CPU Feature: AVX
-func (x Int16x8) Equal(y Int16x8) Mask16x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) FloorWithPrecision(imm8 uint8) Float32x16
-// Greater compares for greater than.
-// Const Immediate = 6.
+// FloorWithPrecision rounds elements down with specified precision, masked.
+// Const Immediate = 1.
//
-// Asm: VPCMPGTW, CPU Feature: AVX
-func (x Int16x8) Greater(y Int16x8) Mask16x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) FloorWithPrecision(imm8 uint8) Float64x2
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// FloorWithPrecision rounds elements down with specified precision, masked.
+// Const Immediate = 1.
//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) FloorWithPrecision(imm8 uint8) Float64x4
-// Less compares for less than.
+// FloorWithPrecision rounds elements down with specified precision, masked.
// Const Immediate = 1.
//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x8) Less(y Int16x8) Mask16x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) FloorWithPrecision(imm8 uint8) Float64x8
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
-//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x8) LessEqual(y Int16x8) Mask16x8
+/* FusedMultiplyAdd132 */
-// Absolute computes the absolute value of each element.
+// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
//
-// Asm: VPABSW, CPU Feature: AVX512EVEX
-func (x Int16x8) MaskedAbsolute(y Mask16x8) Int16x8
+// Asm: VFMADD132PS, CPU Feature: AVX512EVEX
+func (x Float32x4) FusedMultiplyAdd132(y Float32x4, z Float32x4) Float32x4
-// PopCount counts the number of set bits in each element.
+// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
//
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
-func (x Int16x8) MaskedPopCount(y Mask16x8) Int16x8
+// Asm: VFMADD132PS, CPU Feature: AVX512EVEX
+func (x Float32x8) FusedMultiplyAdd132(y Float32x8, z Float32x8) Float32x8
-// Max computes the maximum of corresponding elements.
+// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
//
-// Asm: VPMAXSW, CPU Feature: AVX
-func (x Int16x8) Max(y Int16x8) Int16x8
+// Asm: VFMADD132PS, CPU Feature: AVX512EVEX
+func (x Float32x16) FusedMultiplyAdd132(y Float32x16, z Float32x16) Float32x16
-// Min computes the minimum of corresponding elements.
+// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
//
-// Asm: VPMINSW, CPU Feature: AVX
-func (x Int16x8) Min(y Int16x8) Int16x8
+// Asm: VFMADD132PD, CPU Feature: AVX512EVEX
+func (x Float64x2) FusedMultiplyAdd132(y Float64x2, z Float64x2) Float64x2
-// MulHigh multiplies elements and stores the high part of the result.
+// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
//
-// Asm: VPMULHW, CPU Feature: AVX
-func (x Int16x8) MulHigh(y Int16x8) Int16x8
+// Asm: VFMADD132PD, CPU Feature: AVX512EVEX
+func (x Float64x4) FusedMultiplyAdd132(y Float64x4, z Float64x4) Float64x4
-// MulLow multiplies elements and stores the low part of the result.
+// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
//
-// Asm: VPMULLW, CPU Feature: AVX
-func (x Int16x8) MulLow(y Int16x8) Int16x8
+// Asm: VFMADD132PD, CPU Feature: AVX512EVEX
+func (x Float64x8) FusedMultiplyAdd132(y Float64x8, z Float64x8) Float64x8
-// NotEqual compares for inequality.
-// Const Immediate = 4.
-//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x8) NotEqual(y Int16x8) Mask16x8
+/* FusedMultiplyAdd213 */
-// Or performs a bitwise OR operation between two vectors.
+// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
//
-// Asm: VPOR, CPU Feature: AVX
-func (x Int16x8) Or(y Int16x8) Int16x8
+// Asm: VFMADD213PS, CPU Feature: AVX512EVEX
+func (x Float32x4) FusedMultiplyAdd213(y Float32x4, z Float32x4) Float32x4
-// PairDotProd multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
+// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
//
-// Asm: VPMADDWD, CPU Feature: AVX
-func (x Int16x8) PairDotProd(y Int16x8) Int32x4
+// Asm: VFMADD213PS, CPU Feature: AVX512EVEX
+func (x Float32x8) FusedMultiplyAdd213(y Float32x8, z Float32x8) Float32x8
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
//
-// Asm: VPHADDW, CPU Feature: AVX
-func (x Int16x8) PairwiseAdd(y Int16x8) Int16x8
+// Asm: VFMADD213PS, CPU Feature: AVX512EVEX
+func (x Float32x16) FusedMultiplyAdd213(y Float32x16, z Float32x16) Float32x16
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
//
-// Asm: VPHSUBW, CPU Feature: AVX
-func (x Int16x8) PairwiseSub(y Int16x8) Int16x8
+// Asm: VFMADD213PD, CPU Feature: AVX512EVEX
+func (x Float64x2) FusedMultiplyAdd213(y Float64x2, z Float64x2) Float64x2
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
+// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
//
-// Asm: VPADDSW, CPU Feature: AVX
-func (x Int16x8) SaturatedAdd(y Int16x8) Int16x8
+// Asm: VFMADD213PD, CPU Feature: AVX512EVEX
+func (x Float64x4) FusedMultiplyAdd213(y Float64x4, z Float64x4) Float64x4
-// SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
//
-// Asm: VPHADDSW, CPU Feature: AVX
-func (x Int16x8) SaturatedPairwiseAdd(y Int16x8) Int16x8
+// Asm: VFMADD213PD, CPU Feature: AVX512EVEX
+func (x Float64x8) FusedMultiplyAdd213(y Float64x8, z Float64x8) Float64x8
-// SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
-//
-// Asm: VPHSUBSW, CPU Feature: AVX
-func (x Int16x8) SaturatedPairwiseSub(y Int16x8) Int16x8
+/* FusedMultiplyAdd231 */
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
//
-// Asm: VPSUBSW, CPU Feature: AVX
-func (x Int16x8) SaturatedSub(y Int16x8) Int16x8
+// Asm: VFMADD231PS, CPU Feature: AVX512EVEX
+func (x Float32x4) FusedMultiplyAdd231(y Float32x4, z Float32x4) Float32x4
-// Sign returns the product of the first operand with -1, 0, or 1,
-// whichever constant is nearest to the value of the second operand.
+// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
//
-// Asm: VPSIGNW, CPU Feature: AVX
-func (x Int16x8) Sign(y Int16x8) Int16x8
+// Asm: VFMADD231PS, CPU Feature: AVX512EVEX
+func (x Float32x8) FusedMultiplyAdd231(y Float32x8, z Float32x8) Float32x8
-// Sub subtracts corresponding elements of two vectors.
+// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
//
-// Asm: VPSUBW, CPU Feature: AVX
-func (x Int16x8) Sub(y Int16x8) Int16x8
+// Asm: VFMADD231PS, CPU Feature: AVX512EVEX
+func (x Float32x16) FusedMultiplyAdd231(y Float32x16, z Float32x16) Float32x16
-// Xor performs a bitwise XOR operation between two vectors.
+// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
//
-// Asm: VPXOR, CPU Feature: AVX
-func (x Int16x8) Xor(y Int16x8) Int16x8
+// Asm: VFMADD231PD, CPU Feature: AVX512EVEX
+func (x Float64x2) FusedMultiplyAdd231(y Float64x2, z Float64x2) Float64x2
-// Add adds corresponding elements of two vectors.
+// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
//
-// Asm: VPADDD, CPU Feature: AVX512EVEX
-func (x Int32x16) Add(y Int32x16) Int32x16
+// Asm: VFMADD231PD, CPU Feature: AVX512EVEX
+func (x Float64x4) FusedMultiplyAdd231(y Float64x4, z Float64x4) Float64x4
-// And performs a masked bitwise AND operation between two vectors.
+// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
//
-// Asm: VPANDD, CPU Feature: AVX512EVEX
-func (x Int32x16) And(y Int32x16) Int32x16
+// Asm: VFMADD231PD, CPU Feature: AVX512EVEX
+func (x Float64x8) FusedMultiplyAdd231(y Float64x8, z Float64x8) Float64x8
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
-//
-// Asm: VPANDND, CPU Feature: AVX512EVEX
-func (x Int32x16) AndNot(y Int32x16) Int32x16
+/* FusedMultiplyAddSub132 */
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
//
-// Asm: VPCMPEQD, CPU Feature: AVX512EVEX
-func (x Int32x16) Equal(y Int32x16) Mask32x16
+// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX
+func (x Float32x4) FusedMultiplyAddSub132(y Float32x4, z Float32x4) Float32x4
-// Greater compares for greater than.
-// Const Immediate = 6.
+// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
//
-// Asm: VPCMPGTD, CPU Feature: AVX512EVEX
-func (x Int32x16) Greater(y Int32x16) Mask32x16
+// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX
+func (x Float32x8) FusedMultiplyAddSub132(y Float32x8, z Float32x8) Float32x8
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
//
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x16) GreaterEqual(y Int32x16) Mask32x16
+// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX
+func (x Float32x16) FusedMultiplyAddSub132(y Float32x16, z Float32x16) Float32x16
-// Less compares for less than.
-// Const Immediate = 1.
+// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
//
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x16) Less(y Int32x16) Mask32x16
+// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX
+func (x Float64x2) FusedMultiplyAddSub132(y Float64x2, z Float64x2) Float64x2
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
//
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x16) LessEqual(y Int32x16) Mask32x16
+// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX
+func (x Float64x4) FusedMultiplyAddSub132(y Float64x4, z Float64x4) Float64x4
-// Absolute computes the absolute value of each element.
+// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
//
-// Asm: VPABSD, CPU Feature: AVX512EVEX
-func (x Int32x16) MaskedAbsolute(y Mask32x16) Int32x16
+// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX
+func (x Float64x8) FusedMultiplyAddSub132(y Float64x8, z Float64x8) Float64x8
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
-func (x Int32x16) MaskedPopCount(y Mask32x16) Int32x16
+/* FusedMultiplyAddSub213 */
-// Max computes the maximum of corresponding elements.
+// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
//
-// Asm: VPMAXSD, CPU Feature: AVX512EVEX
-func (x Int32x16) Max(y Int32x16) Int32x16
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX
+func (x Float32x4) FusedMultiplyAddSub213(y Float32x4, z Float32x4) Float32x4
-// Min computes the minimum of corresponding elements.
+// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
//
-// Asm: VPMINSD, CPU Feature: AVX512EVEX
-func (x Int32x16) Min(y Int32x16) Int32x16
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX
+func (x Float32x8) FusedMultiplyAddSub213(y Float32x8, z Float32x8) Float32x8
-// MulLow multiplies elements and stores the low part of the result, masked.
+// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
//
-// Asm: VPMULLD, CPU Feature: AVX512EVEX
-func (x Int32x16) MulLow(y Int32x16) Int32x16
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX
+func (x Float32x16) FusedMultiplyAddSub213(y Float32x16, z Float32x16) Float32x16
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
//
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x16) NotEqual(y Int32x16) Mask32x16
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX
+func (x Float64x2) FusedMultiplyAddSub213(y Float64x2, z Float64x2) Float64x2
-// Or performs a masked bitwise OR operation between two vectors.
+// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
//
-// Asm: VPORD, CPU Feature: AVX512EVEX
-func (x Int32x16) Or(y Int32x16) Int32x16
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX
+func (x Float64x4) FusedMultiplyAddSub213(y Float64x4, z Float64x4) Float64x4
-// Sub subtracts corresponding elements of two vectors.
+// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
//
-// Asm: VPSUBD, CPU Feature: AVX512EVEX
-func (x Int32x16) Sub(y Int32x16) Int32x16
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX
+func (x Float64x8) FusedMultiplyAddSub213(y Float64x8, z Float64x8) Float64x8
-// Xor performs a masked bitwise XOR operation between two vectors.
-//
-// Asm: VPXORD, CPU Feature: AVX512EVEX
-func (x Int32x16) Xor(y Int32x16) Int32x16
+/* FusedMultiplyAddSub231 */
-// Add adds corresponding elements of two vectors.
+// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
//
-// Asm: VPADDD, CPU Feature: AVX
-func (x Int32x4) Add(y Int32x4) Int32x4
+// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX
+func (x Float32x4) FusedMultiplyAddSub231(y Float32x4, z Float32x4) Float32x4
-// And performs a bitwise AND operation between two vectors.
+// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
//
-// Asm: VPAND, CPU Feature: AVX
-func (x Int32x4) And(y Int32x4) Int32x4
+// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX
+func (x Float32x8) FusedMultiplyAddSub231(y Float32x8, z Float32x8) Float32x8
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
//
-// Asm: VPANDN, CPU Feature: AVX
-func (x Int32x4) AndNot(y Int32x4) Int32x4
+// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX
+func (x Float32x16) FusedMultiplyAddSub231(y Float32x16, z Float32x16) Float32x16
-// Equal compares for equality.
-// Const Immediate = 0.
+// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
//
-// Asm: VPCMPEQD, CPU Feature: AVX
-func (x Int32x4) Equal(y Int32x4) Mask32x4
+// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX
+func (x Float64x2) FusedMultiplyAddSub231(y Float64x2, z Float64x2) Float64x2
-// Greater compares for greater than.
-// Const Immediate = 6.
+// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
//
-// Asm: VPCMPGTD, CPU Feature: AVX
-func (x Int32x4) Greater(y Int32x4) Mask32x4
+// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX
+func (x Float64x4) FusedMultiplyAddSub231(y Float64x4, z Float64x4) Float64x4
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
//
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4
+// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX
+func (x Float64x8) FusedMultiplyAddSub231(y Float64x8, z Float64x8) Float64x8
-// Less compares for less than.
-// Const Immediate = 1.
-//
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x4) Less(y Int32x4) Mask32x4
+/* FusedMultiplySub132 */
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// FusedMultiplySub132 performs `(v1 * v3) - v2`.
//
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x4) LessEqual(y Int32x4) Mask32x4
+// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX
+func (x Float32x4) FusedMultiplySub132(y Float32x4, z Float32x4) Float32x4
-// Absolute computes the absolute value of each element.
+// FusedMultiplySub132 performs `(v1 * v3) - v2`.
//
-// Asm: VPABSD, CPU Feature: AVX512EVEX
-func (x Int32x4) MaskedAbsolute(y Mask32x4) Int32x4
+// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX
+func (x Float32x8) FusedMultiplySub132(y Float32x8, z Float32x8) Float32x8
-// PopCount counts the number of set bits in each element.
+// FusedMultiplySub132 performs `(v1 * v3) - v2`.
//
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
-func (x Int32x4) MaskedPopCount(y Mask32x4) Int32x4
+// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX
+func (x Float32x16) FusedMultiplySub132(y Float32x16, z Float32x16) Float32x16
-// Max computes the maximum of corresponding elements.
+// FusedMultiplySub132 performs `(v1 * v3) - v2`.
//
-// Asm: VPMAXSD, CPU Feature: AVX
-func (x Int32x4) Max(y Int32x4) Int32x4
+// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX
+func (x Float64x2) FusedMultiplySub132(y Float64x2, z Float64x2) Float64x2
-// Min computes the minimum of corresponding elements.
+// FusedMultiplySub132 performs `(v1 * v3) - v2`.
//
-// Asm: VPMINSD, CPU Feature: AVX
-func (x Int32x4) Min(y Int32x4) Int32x4
+// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX
+func (x Float64x4) FusedMultiplySub132(y Float64x4, z Float64x4) Float64x4
-// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// FusedMultiplySub132 performs `(v1 * v3) - v2`.
//
-// Asm: VPMULDQ, CPU Feature: AVX
-func (x Int32x4) MulEvenWiden(y Int32x4) Int64x2
+// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX
+func (x Float64x8) FusedMultiplySub132(y Float64x8, z Float64x8) Float64x8
-// MulLow multiplies elements and stores the low part of the result.
-//
-// Asm: VPMULLD, CPU Feature: AVX
-func (x Int32x4) MulLow(y Int32x4) Int32x4
+/* FusedMultiplySub213 */
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// FusedMultiplySub213 performs `(v2 * v1) - v3`.
//
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x4) NotEqual(y Int32x4) Mask32x4
+// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX
+func (x Float32x4) FusedMultiplySub213(y Float32x4, z Float32x4) Float32x4
-// Or performs a bitwise OR operation between two vectors.
+// FusedMultiplySub213 performs `(v2 * v1) - v3`.
//
-// Asm: VPOR, CPU Feature: AVX
-func (x Int32x4) Or(y Int32x4) Int32x4
+// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX
+func (x Float32x8) FusedMultiplySub213(y Float32x8, z Float32x8) Float32x8
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// FusedMultiplySub213 performs `(v2 * v1) - v3`.
//
-// Asm: VPHADDD, CPU Feature: AVX
-func (x Int32x4) PairwiseAdd(y Int32x4) Int32x4
+// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX
+func (x Float32x16) FusedMultiplySub213(y Float32x16, z Float32x16) Float32x16
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// FusedMultiplySub213 performs `(v2 * v1) - v3`.
//
-// Asm: VPHSUBD, CPU Feature: AVX
-func (x Int32x4) PairwiseSub(y Int32x4) Int32x4
+// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX
+func (x Float64x2) FusedMultiplySub213(y Float64x2, z Float64x2) Float64x2
-// Sign returns the product of the first operand with -1, 0, or 1,
-// whichever constant is nearest to the value of the second operand.
+// FusedMultiplySub213 performs `(v2 * v1) - v3`.
//
-// Asm: VPSIGND, CPU Feature: AVX
-func (x Int32x4) Sign(y Int32x4) Int32x4
+// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX
+func (x Float64x4) FusedMultiplySub213(y Float64x4, z Float64x4) Float64x4
-// Sub subtracts corresponding elements of two vectors.
+// FusedMultiplySub213 performs `(v2 * v1) - v3`.
//
-// Asm: VPSUBD, CPU Feature: AVX
-func (x Int32x4) Sub(y Int32x4) Int32x4
+// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX
+func (x Float64x8) FusedMultiplySub213(y Float64x8, z Float64x8) Float64x8
-// Xor performs a bitwise XOR operation between two vectors.
-//
-// Asm: VPXOR, CPU Feature: AVX
-func (x Int32x4) Xor(y Int32x4) Int32x4
+/* FusedMultiplySub231 */
-// Add adds corresponding elements of two vectors.
+// FusedMultiplySub231 performs `(v2 * v3) - v1`.
//
-// Asm: VPADDD, CPU Feature: AVX2
-func (x Int32x8) Add(y Int32x8) Int32x8
+// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX
+func (x Float32x4) FusedMultiplySub231(y Float32x4, z Float32x4) Float32x4
-// And performs a bitwise AND operation between two vectors.
+// FusedMultiplySub231 performs `(v2 * v3) - v1`.
//
-// Asm: VPAND, CPU Feature: AVX2
-func (x Int32x8) And(y Int32x8) Int32x8
+// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX
+func (x Float32x8) FusedMultiplySub231(y Float32x8, z Float32x8) Float32x8
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// FusedMultiplySub231 performs `(v2 * v3) - v1`.
//
-// Asm: VPANDN, CPU Feature: AVX2
-func (x Int32x8) AndNot(y Int32x8) Int32x8
+// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX
+func (x Float32x16) FusedMultiplySub231(y Float32x16, z Float32x16) Float32x16
-// Equal compares for equality.
-// Const Immediate = 0.
+// FusedMultiplySub231 performs `(v2 * v3) - v1`.
//
-// Asm: VPCMPEQD, CPU Feature: AVX2
-func (x Int32x8) Equal(y Int32x8) Mask32x8
+// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX
+func (x Float64x2) FusedMultiplySub231(y Float64x2, z Float64x2) Float64x2
-// Greater compares for greater than.
-// Const Immediate = 6.
+// FusedMultiplySub231 performs `(v2 * v3) - v1`.
//
-// Asm: VPCMPGTD, CPU Feature: AVX2
-func (x Int32x8) Greater(y Int32x8) Mask32x8
+// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX
+func (x Float64x4) FusedMultiplySub231(y Float64x4, z Float64x4) Float64x4
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// FusedMultiplySub231 performs `(v2 * v3) - v1`.
//
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8
+// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX
+func (x Float64x8) FusedMultiplySub231(y Float64x8, z Float64x8) Float64x8
-// Less compares for less than.
-// Const Immediate = 1.
-//
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x8) Less(y Int32x8) Mask32x8
+/* FusedMultiplySubAdd132 */
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
//
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x8) LessEqual(y Int32x8) Mask32x8
+// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX
+func (x Float32x4) FusedMultiplySubAdd132(y Float32x4, z Float32x4) Float32x4
-// Absolute computes the absolute value of each element.
+// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
//
-// Asm: VPABSD, CPU Feature: AVX512EVEX
-func (x Int32x8) MaskedAbsolute(y Mask32x8) Int32x8
+// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX
+func (x Float32x8) FusedMultiplySubAdd132(y Float32x8, z Float32x8) Float32x8
-// PopCount counts the number of set bits in each element.
+// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
//
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
-func (x Int32x8) MaskedPopCount(y Mask32x8) Int32x8
+// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX
+func (x Float32x16) FusedMultiplySubAdd132(y Float32x16, z Float32x16) Float32x16
-// Max computes the maximum of corresponding elements.
+// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
//
-// Asm: VPMAXSD, CPU Feature: AVX2
-func (x Int32x8) Max(y Int32x8) Int32x8
+// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX
+func (x Float64x2) FusedMultiplySubAdd132(y Float64x2, z Float64x2) Float64x2
-// Min computes the minimum of corresponding elements.
+// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
//
-// Asm: VPMINSD, CPU Feature: AVX2
-func (x Int32x8) Min(y Int32x8) Int32x8
+// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX
+func (x Float64x4) FusedMultiplySubAdd132(y Float64x4, z Float64x4) Float64x4
-// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
//
-// Asm: VPMULDQ, CPU Feature: AVX2
-func (x Int32x8) MulEvenWiden(y Int32x8) Int64x4
+// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX
+func (x Float64x8) FusedMultiplySubAdd132(y Float64x8, z Float64x8) Float64x8
-// MulLow multiplies elements and stores the low part of the result.
-//
-// Asm: VPMULLD, CPU Feature: AVX2
-func (x Int32x8) MulLow(y Int32x8) Int32x8
+/* FusedMultiplySubAdd213 */
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
//
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x8) NotEqual(y Int32x8) Mask32x8
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX
+func (x Float32x4) FusedMultiplySubAdd213(y Float32x4, z Float32x4) Float32x4
-// Or performs a bitwise OR operation between two vectors.
+// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Int32x8) Or(y Int32x8) Int32x8
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX
+func (x Float32x8) FusedMultiplySubAdd213(y Float32x8, z Float32x8) Float32x8
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
//
-// Asm: VPHADDD, CPU Feature: AVX2
-func (x Int32x8) PairwiseAdd(y Int32x8) Int32x8
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX
+func (x Float32x16) FusedMultiplySubAdd213(y Float32x16, z Float32x16) Float32x16
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
//
-// Asm: VPHSUBD, CPU Feature: AVX2
-func (x Int32x8) PairwiseSub(y Int32x8) Int32x8
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX
+func (x Float64x2) FusedMultiplySubAdd213(y Float64x2, z Float64x2) Float64x2
-// Sign returns the product of the first operand with -1, 0, or 1,
-// whichever constant is nearest to the value of the second operand.
+// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
//
-// Asm: VPSIGND, CPU Feature: AVX2
-func (x Int32x8) Sign(y Int32x8) Int32x8
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX
+func (x Float64x4) FusedMultiplySubAdd213(y Float64x4, z Float64x4) Float64x4
-// Sub subtracts corresponding elements of two vectors.
+// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
//
-// Asm: VPSUBD, CPU Feature: AVX2
-func (x Int32x8) Sub(y Int32x8) Int32x8
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX
+func (x Float64x8) FusedMultiplySubAdd213(y Float64x8, z Float64x8) Float64x8
-// Xor performs a bitwise XOR operation between two vectors.
+/* FusedMultiplySubAdd231 */
+
+// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
//
-// Asm: VPXOR, CPU Feature: AVX2
-func (x Int32x8) Xor(y Int32x8) Int32x8
+// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX
+func (x Float32x4) FusedMultiplySubAdd231(y Float32x4, z Float32x4) Float32x4
-// Add adds corresponding elements of two vectors.
+// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
//
-// Asm: VPADDQ, CPU Feature: AVX
-func (x Int64x2) Add(y Int64x2) Int64x2
+// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX
+func (x Float32x8) FusedMultiplySubAdd231(y Float32x8, z Float32x8) Float32x8
-// And performs a bitwise AND operation between two vectors.
+// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
//
-// Asm: VPAND, CPU Feature: AVX
-func (x Int64x2) And(y Int64x2) Int64x2
+// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX
+func (x Float32x16) FusedMultiplySubAdd231(y Float32x16, z Float32x16) Float32x16
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
//
-// Asm: VPANDN, CPU Feature: AVX
-func (x Int64x2) AndNot(y Int64x2) Int64x2
+// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX
+func (x Float64x2) FusedMultiplySubAdd231(y Float64x2, z Float64x2) Float64x2
-// Equal compares for equality.
-// Const Immediate = 0.
+// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
//
-// Asm: VPCMPEQQ, CPU Feature: AVX
-func (x Int64x2) Equal(y Int64x2) Mask64x2
+// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX
+func (x Float64x4) FusedMultiplySubAdd231(y Float64x4, z Float64x4) Float64x4
-// Greater compares for greater than.
-// Const Immediate = 6.
+// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
//
-// Asm: VPCMPGTQ, CPU Feature: AVX512EVEX
-func (x Int64x2) Greater(y Int64x2) Mask64x2
+// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX
+func (x Float64x8) FusedMultiplySubAdd231(y Float64x8, z Float64x8) Float64x8
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+/* FusedNegativeMultiplyAdd132 */
+
+// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
//
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2
+// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX
+func (x Float32x4) FusedNegativeMultiplyAdd132(y Float32x4, z Float32x4) Float32x4
-// Less compares for less than.
-// Const Immediate = 1.
+// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
//
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x2) Less(y Int64x2) Mask64x2
+// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX
+func (x Float32x8) FusedNegativeMultiplyAdd132(y Float32x8, z Float32x8) Float32x8
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
//
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x2) LessEqual(y Int64x2) Mask64x2
+// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX
+func (x Float32x16) FusedNegativeMultiplyAdd132(y Float32x16, z Float32x16) Float32x16
-// Absolute computes the absolute value of each element.
+// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
//
-// Asm: VPABSQ, CPU Feature: AVX512EVEX
-func (x Int64x2) MaskedAbsolute(y Mask64x2) Int64x2
+// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX
+func (x Float64x2) FusedNegativeMultiplyAdd132(y Float64x2, z Float64x2) Float64x2
-// PopCount counts the number of set bits in each element.
+// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
//
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
-func (x Int64x2) MaskedPopCount(y Mask64x2) Int64x2
+// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX
+func (x Float64x4) FusedNegativeMultiplyAdd132(y Float64x4, z Float64x4) Float64x4
-// Max computes the maximum of corresponding elements.
+// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
//
-// Asm: VPMAXSQ, CPU Feature: AVX512EVEX
-func (x Int64x2) Max(y Int64x2) Int64x2
+// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX
+func (x Float64x8) FusedNegativeMultiplyAdd132(y Float64x8, z Float64x8) Float64x8
-// Min computes the minimum of corresponding elements.
+/* FusedNegativeMultiplyAdd213 */
+
+// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
//
-// Asm: VPMINSQ, CPU Feature: AVX512EVEX
-func (x Int64x2) Min(y Int64x2) Int64x2
+// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX
+func (x Float32x4) FusedNegativeMultiplyAdd213(y Float32x4, z Float32x4) Float32x4
-// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
//
-// Asm: VPMULDQ, CPU Feature: AVX512EVEX
-func (x Int64x2) MulEvenWiden(y Int64x2) Int64x2
+// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX
+func (x Float32x8) FusedNegativeMultiplyAdd213(y Float32x8, z Float32x8) Float32x8
-// MulLow multiplies elements and stores the low part of the result, masked.
+// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
//
-// Asm: VPMULLQ, CPU Feature: AVX512EVEX
-func (x Int64x2) MulLow(y Int64x2) Int64x2
+// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX
+func (x Float32x16) FusedNegativeMultiplyAdd213(y Float32x16, z Float32x16) Float32x16
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
//
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x2) NotEqual(y Int64x2) Mask64x2
+// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX
+func (x Float64x2) FusedNegativeMultiplyAdd213(y Float64x2, z Float64x2) Float64x2
-// Or performs a bitwise OR operation between two vectors.
+// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
//
-// Asm: VPOR, CPU Feature: AVX
-func (x Int64x2) Or(y Int64x2) Int64x2
+// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX
+func (x Float64x4) FusedNegativeMultiplyAdd213(y Float64x4, z Float64x4) Float64x4
-// Sub subtracts corresponding elements of two vectors.
+// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
//
-// Asm: VPSUBQ, CPU Feature: AVX
-func (x Int64x2) Sub(y Int64x2) Int64x2
+// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX
+func (x Float64x8) FusedNegativeMultiplyAdd213(y Float64x8, z Float64x8) Float64x8
-// Xor performs a bitwise XOR operation between two vectors.
+/* FusedNegativeMultiplyAdd231 */
+
+// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
//
-// Asm: VPXOR, CPU Feature: AVX
-func (x Int64x2) Xor(y Int64x2) Int64x2
+// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX
+func (x Float32x4) FusedNegativeMultiplyAdd231(y Float32x4, z Float32x4) Float32x4
-// Add adds corresponding elements of two vectors.
+// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
//
-// Asm: VPADDQ, CPU Feature: AVX2
-func (x Int64x4) Add(y Int64x4) Int64x4
+// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX
+func (x Float32x8) FusedNegativeMultiplyAdd231(y Float32x8, z Float32x8) Float32x8
-// And performs a bitwise AND operation between two vectors.
+// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
//
-// Asm: VPAND, CPU Feature: AVX2
-func (x Int64x4) And(y Int64x4) Int64x4
+// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX
+func (x Float32x16) FusedNegativeMultiplyAdd231(y Float32x16, z Float32x16) Float32x16
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
//
-// Asm: VPANDN, CPU Feature: AVX2
-func (x Int64x4) AndNot(y Int64x4) Int64x4
+// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX
+func (x Float64x2) FusedNegativeMultiplyAdd231(y Float64x2, z Float64x2) Float64x2
-// Equal compares for equality.
-// Const Immediate = 0.
+// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
//
-// Asm: VPCMPEQQ, CPU Feature: AVX2
-func (x Int64x4) Equal(y Int64x4) Mask64x4
+// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX
+func (x Float64x4) FusedNegativeMultiplyAdd231(y Float64x4, z Float64x4) Float64x4
-// Greater compares for greater than.
-// Const Immediate = 6.
+// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
//
-// Asm: VPCMPGTQ, CPU Feature: AVX2
-func (x Int64x4) Greater(y Int64x4) Mask64x4
+// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX
+func (x Float64x8) FusedNegativeMultiplyAdd231(y Float64x8, z Float64x8) Float64x8
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+/* FusedNegativeMultiplySub132 */
+
+// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
//
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4
+// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX
+func (x Float32x4) FusedNegativeMultiplySub132(y Float32x4, z Float32x4) Float32x4
-// Less compares for less than.
-// Const Immediate = 1.
+// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
//
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x4) Less(y Int64x4) Mask64x4
+// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX
+func (x Float32x8) FusedNegativeMultiplySub132(y Float32x8, z Float32x8) Float32x8
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
//
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x4) LessEqual(y Int64x4) Mask64x4
+// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX
+func (x Float32x16) FusedNegativeMultiplySub132(y Float32x16, z Float32x16) Float32x16
-// Absolute computes the absolute value of each element.
+// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
//
-// Asm: VPABSQ, CPU Feature: AVX512EVEX
-func (x Int64x4) MaskedAbsolute(y Mask64x4) Int64x4
+// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX
+func (x Float64x2) FusedNegativeMultiplySub132(y Float64x2, z Float64x2) Float64x2
-// PopCount counts the number of set bits in each element.
+// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
//
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
-func (x Int64x4) MaskedPopCount(y Mask64x4) Int64x4
+// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX
+func (x Float64x4) FusedNegativeMultiplySub132(y Float64x4, z Float64x4) Float64x4
-// Max computes the maximum of corresponding elements.
+// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
//
-// Asm: VPMAXSQ, CPU Feature: AVX512EVEX
-func (x Int64x4) Max(y Int64x4) Int64x4
+// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX
+func (x Float64x8) FusedNegativeMultiplySub132(y Float64x8, z Float64x8) Float64x8
-// Min computes the minimum of corresponding elements.
+/* FusedNegativeMultiplySub213 */
+
+// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
//
-// Asm: VPMINSQ, CPU Feature: AVX512EVEX
-func (x Int64x4) Min(y Int64x4) Int64x4
+// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX
+func (x Float32x4) FusedNegativeMultiplySub213(y Float32x4, z Float32x4) Float32x4
-// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
//
-// Asm: VPMULDQ, CPU Feature: AVX512EVEX
-func (x Int64x4) MulEvenWiden(y Int64x4) Int64x4
+// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX
+func (x Float32x8) FusedNegativeMultiplySub213(y Float32x8, z Float32x8) Float32x8
-// MulLow multiplies elements and stores the low part of the result, masked.
+// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
//
-// Asm: VPMULLQ, CPU Feature: AVX512EVEX
-func (x Int64x4) MulLow(y Int64x4) Int64x4
+// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX
+func (x Float32x16) FusedNegativeMultiplySub213(y Float32x16, z Float32x16) Float32x16
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
//
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x4) NotEqual(y Int64x4) Mask64x4
+// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX
+func (x Float64x2) FusedNegativeMultiplySub213(y Float64x2, z Float64x2) Float64x2
-// Or performs a bitwise OR operation between two vectors.
+// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Int64x4) Or(y Int64x4) Int64x4
+// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX
+func (x Float64x4) FusedNegativeMultiplySub213(y Float64x4, z Float64x4) Float64x4
-// Sub subtracts corresponding elements of two vectors.
+// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
//
-// Asm: VPSUBQ, CPU Feature: AVX2
-func (x Int64x4) Sub(y Int64x4) Int64x4
+// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX
+func (x Float64x8) FusedNegativeMultiplySub213(y Float64x8, z Float64x8) Float64x8
-// Xor performs a bitwise XOR operation between two vectors.
+/* FusedNegativeMultiplySub231 */
+
+// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
//
-// Asm: VPXOR, CPU Feature: AVX2
-func (x Int64x4) Xor(y Int64x4) Int64x4
+// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX
+func (x Float32x4) FusedNegativeMultiplySub231(y Float32x4, z Float32x4) Float32x4
-// Add adds corresponding elements of two vectors.
+// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
//
-// Asm: VPADDQ, CPU Feature: AVX512EVEX
-func (x Int64x8) Add(y Int64x8) Int64x8
+// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX
+func (x Float32x8) FusedNegativeMultiplySub231(y Float32x8, z Float32x8) Float32x8
-// And performs a masked bitwise AND operation between two vectors.
+// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
//
-// Asm: VPANDQ, CPU Feature: AVX512EVEX
-func (x Int64x8) And(y Int64x8) Int64x8
+// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX
+func (x Float32x16) FusedNegativeMultiplySub231(y Float32x16, z Float32x16) Float32x16
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
//
-// Asm: VPANDNQ, CPU Feature: AVX512EVEX
-func (x Int64x8) AndNot(y Int64x8) Int64x8
+// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX
+func (x Float64x2) FusedNegativeMultiplySub231(y Float64x2, z Float64x2) Float64x2
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
//
-// Asm: VPCMPEQQ, CPU Feature: AVX512EVEX
-func (x Int64x8) Equal(y Int64x8) Mask64x8
+// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX
+func (x Float64x4) FusedNegativeMultiplySub231(y Float64x4, z Float64x4) Float64x4
+
+// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
+//
+// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX
+func (x Float64x8) FusedNegativeMultiplySub231(y Float64x8, z Float64x8) Float64x8
+
+/* Greater */
// Greater compares for greater than.
// Const Immediate = 6.
//
-// Asm: VPCMPGTQ, CPU Feature: AVX512EVEX
-func (x Int64x8) Greater(y Int64x8) Mask64x8
+// Asm: VPCMPGTB, CPU Feature: AVX
+func (x Int8x16) Greater(y Int8x16) Mask8x16
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x8) GreaterEqual(y Int64x8) Mask64x8
+// Asm: VPCMPGTB, CPU Feature: AVX2
+func (x Int8x32) Greater(y Int8x32) Mask8x32
-// Less compares for less than.
-// Const Immediate = 1.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x8) Less(y Int64x8) Mask64x8
+// Asm: VPCMPGTW, CPU Feature: AVX
+func (x Int16x8) Greater(y Int16x8) Mask16x8
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x8) LessEqual(y Int64x8) Mask64x8
+// Asm: VPCMPGTW, CPU Feature: AVX2
+func (x Int16x16) Greater(y Int16x16) Mask16x16
-// Absolute computes the absolute value of each element.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPABSQ, CPU Feature: AVX512EVEX
-func (x Int64x8) MaskedAbsolute(y Mask64x8) Int64x8
+// Asm: VPCMPGTD, CPU Feature: AVX
+func (x Int32x4) Greater(y Int32x4) Mask32x4
-// PopCount counts the number of set bits in each element.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
-func (x Int64x8) MaskedPopCount(y Mask64x8) Int64x8
+// Asm: VPCMPGTD, CPU Feature: AVX2
+func (x Int32x8) Greater(y Int32x8) Mask32x8
-// Max computes the maximum of corresponding elements.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPMAXSQ, CPU Feature: AVX512EVEX
-func (x Int64x8) Max(y Int64x8) Int64x8
+// Asm: VPCMPGTQ, CPU Feature: AVX2
+func (x Int64x4) Greater(y Int64x4) Mask64x4
-// Min computes the minimum of corresponding elements.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPMINSQ, CPU Feature: AVX512EVEX
-func (x Int64x8) Min(y Int64x8) Int64x8
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x4) Greater(y Float32x4) Mask32x4
-// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPMULDQ, CPU Feature: AVX512EVEX
-func (x Int64x8) MulEvenWiden(y Int64x8) Int64x8
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x8) Greater(y Float32x8) Mask32x8
-// MulLow multiplies elements and stores the low part of the result, masked.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPMULLQ, CPU Feature: AVX512EVEX
-func (x Int64x8) MulLow(y Int64x8) Int64x8
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x16) Greater(y Float32x16) Mask32x16
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x8) NotEqual(y Int64x8) Mask64x8
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x2) Greater(y Float64x2) Mask64x2
-// Or performs a masked bitwise OR operation between two vectors.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPORQ, CPU Feature: AVX512EVEX
-func (x Int64x8) Or(y Int64x8) Int64x8
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x4) Greater(y Float64x4) Mask64x4
-// Sub subtracts corresponding elements of two vectors.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPSUBQ, CPU Feature: AVX512EVEX
-func (x Int64x8) Sub(y Int64x8) Int64x8
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x8) Greater(y Float64x8) Mask64x8
-// Xor performs a masked bitwise XOR operation between two vectors.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPXORQ, CPU Feature: AVX512EVEX
-func (x Int64x8) Xor(y Int64x8) Int64x8
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x64) Greater(y Int8x64) Mask8x64
-// Add adds corresponding elements of two vectors.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPADDB, CPU Feature: AVX
-func (x Int8x16) Add(y Int8x16) Int8x16
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x32) Greater(y Int16x32) Mask16x32
-// And performs a bitwise AND operation between two vectors.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPAND, CPU Feature: AVX
-func (x Int8x16) And(y Int8x16) Int8x16
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x16) Greater(y Int32x16) Mask32x16
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPANDN, CPU Feature: AVX
-func (x Int8x16) AndNot(y Int8x16) Int8x16
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x2) Greater(y Int64x2) Mask64x2
-// Equal compares for equality.
-// Const Immediate = 0.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPCMPEQB, CPU Feature: AVX
-func (x Int8x16) Equal(y Int8x16) Mask8x16
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x8) Greater(y Int64x8) Mask64x8
// Greater compares for greater than.
// Const Immediate = 6.
//
-// Asm: VPCMPGTB, CPU Feature: AVX
-func (x Int8x16) Greater(y Int8x16) Mask8x16
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x16) Greater(y Uint8x16) Mask8x16
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x32) Greater(y Uint8x32) Mask8x32
-// Less compares for less than.
-// Const Immediate = 1.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x16) Less(y Int8x16) Mask8x16
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x64) Greater(y Uint8x64) Mask8x64
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x16) LessEqual(y Int8x16) Mask8x16
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x8) Greater(y Uint16x8) Mask16x8
-// Absolute computes the absolute value of each element.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPABSB, CPU Feature: AVX512EVEX
-func (x Int8x16) MaskedAbsolute(y Mask8x16) Int8x16
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x16) Greater(y Uint16x16) Mask16x16
-// PopCount counts the number of set bits in each element.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
-func (x Int8x16) MaskedPopCount(y Mask8x16) Int8x16
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x32) Greater(y Uint16x32) Mask16x32
-// Max computes the maximum of corresponding elements.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPMAXSB, CPU Feature: AVX
-func (x Int8x16) Max(y Int8x16) Int8x16
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x4) Greater(y Uint32x4) Mask32x4
-// Min computes the minimum of corresponding elements.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPMINSB, CPU Feature: AVX
-func (x Int8x16) Min(y Int8x16) Int8x16
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x8) Greater(y Uint32x8) Mask32x8
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x16) NotEqual(y Int8x16) Mask8x16
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x16) Greater(y Uint32x16) Mask32x16
-// Or performs a bitwise OR operation between two vectors.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPOR, CPU Feature: AVX
-func (x Int8x16) Or(y Int8x16) Int8x16
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) Greater(y Uint64x2) Mask64x2
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPADDSB, CPU Feature: AVX
-func (x Int8x16) SaturatedAdd(y Int8x16) Int8x16
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) Greater(y Uint64x4) Mask64x4
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPSUBSB, CPU Feature: AVX
-func (x Int8x16) SaturatedSub(y Int8x16) Int8x16
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) Greater(y Uint64x8) Mask64x8
-// Sign returns the product of the first operand with -1, 0, or 1,
-// whichever constant is nearest to the value of the second operand.
-//
-// Asm: VPSIGNB, CPU Feature: AVX
-func (x Int8x16) Sign(y Int8x16) Int8x16
+/* GreaterEqual */
-// Sub subtracts corresponding elements of two vectors.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPSUBB, CPU Feature: AVX
-func (x Int8x16) Sub(y Int8x16) Int8x16
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x4) GreaterEqual(y Float32x4) Mask32x4
-// Xor performs a bitwise XOR operation between two vectors.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPXOR, CPU Feature: AVX
-func (x Int8x16) Xor(y Int8x16) Int8x16
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x8) GreaterEqual(y Float32x8) Mask32x8
-// Add adds corresponding elements of two vectors.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPADDB, CPU Feature: AVX2
-func (x Int8x32) Add(y Int8x32) Int8x32
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x16) GreaterEqual(y Float32x16) Mask32x16
-// And performs a bitwise AND operation between two vectors.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPAND, CPU Feature: AVX2
-func (x Int8x32) And(y Int8x32) Int8x32
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x2) GreaterEqual(y Float64x2) Mask64x2
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPANDN, CPU Feature: AVX2
-func (x Int8x32) AndNot(y Int8x32) Int8x32
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x4) GreaterEqual(y Float64x4) Mask64x4
-// Equal compares for equality.
-// Const Immediate = 0.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPCMPEQB, CPU Feature: AVX2
-func (x Int8x32) Equal(y Int8x32) Mask8x32
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x8) GreaterEqual(y Float64x8) Mask64x8
-// Greater compares for greater than.
-// Const Immediate = 6.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPCMPGTB, CPU Feature: AVX2
-func (x Int8x32) Greater(y Int8x32) Mask8x32
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16
// GreaterEqual compares for greater than or equal.
// Const Immediate = 5.
// Asm: VPCMPB, CPU Feature: AVX512EVEX
func (x Int8x32) GreaterEqual(y Int8x32) Mask8x32
-// Less compares for less than.
-// Const Immediate = 1.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x32) Less(y Int8x32) Mask8x32
+func (x Int8x64) GreaterEqual(y Int8x64) Mask8x64
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x32) LessEqual(y Int8x32) Mask8x32
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8
-// Absolute computes the absolute value of each element.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPABSB, CPU Feature: AVX512EVEX
-func (x Int8x32) MaskedAbsolute(y Mask8x32) Int8x32
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16
-// PopCount counts the number of set bits in each element.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
-func (x Int8x32) MaskedPopCount(y Mask8x32) Int8x32
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x32) GreaterEqual(y Int16x32) Mask16x32
-// Max computes the maximum of corresponding elements.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPMAXSB, CPU Feature: AVX2
-func (x Int8x32) Max(y Int8x32) Int8x32
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4
-// Min computes the minimum of corresponding elements.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPMINSB, CPU Feature: AVX2
-func (x Int8x32) Min(y Int8x32) Int8x32
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x32) NotEqual(y Int8x32) Mask8x32
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x16) GreaterEqual(y Int32x16) Mask32x16
-// Or performs a bitwise OR operation between two vectors.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Int8x32) Or(y Int8x32) Int8x32
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPADDSB, CPU Feature: AVX2
-func (x Int8x32) SaturatedAdd(y Int8x32) Int8x32
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPSUBSB, CPU Feature: AVX2
-func (x Int8x32) SaturatedSub(y Int8x32) Int8x32
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x8) GreaterEqual(y Int64x8) Mask64x8
-// Sign returns the product of the first operand with -1, 0, or 1,
-// whichever constant is nearest to the value of the second operand.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPSIGNB, CPU Feature: AVX2
-func (x Int8x32) Sign(y Int8x32) Int8x32
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16
-// Sub subtracts corresponding elements of two vectors.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPSUBB, CPU Feature: AVX2
-func (x Int8x32) Sub(y Int8x32) Int8x32
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32
-// Xor performs a bitwise XOR operation between two vectors.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPXOR, CPU Feature: AVX2
-func (x Int8x32) Xor(y Int8x32) Int8x32
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x64) GreaterEqual(y Uint8x64) Mask8x64
-// Add adds corresponding elements of two vectors.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPADDB, CPU Feature: AVX512EVEX
-func (x Int8x64) Add(y Int8x64) Int8x64
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPCMPEQB, CPU Feature: AVX512EVEX
-func (x Int8x64) Equal(y Int8x64) Mask8x64
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16
-// Greater compares for greater than.
-// Const Immediate = 6.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPCMPGTB, CPU Feature: AVX512EVEX
-func (x Int8x64) Greater(y Int8x64) Mask8x64
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x32) GreaterEqual(y Uint16x32) Mask16x32
// GreaterEqual compares for greater than or equal.
// Const Immediate = 5.
//
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x64) GreaterEqual(y Int8x64) Mask8x64
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4
-// Less compares for less than.
-// Const Immediate = 1.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x64) Less(y Int8x64) Mask8x64
-
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
-//
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x64) LessEqual(y Int8x64) Mask8x64
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8
-// Absolute computes the absolute value of each element.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPABSB, CPU Feature: AVX512EVEX
-func (x Int8x64) MaskedAbsolute(y Mask8x64) Int8x64
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16
-// PopCount counts the number of set bits in each element.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
-func (x Int8x64) MaskedPopCount(y Mask8x64) Int8x64
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2
-// Max computes the maximum of corresponding elements.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPMAXSB, CPU Feature: AVX512EVEX
-func (x Int8x64) Max(y Int8x64) Int8x64
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4
-// Min computes the minimum of corresponding elements.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPMINSB, CPU Feature: AVX512EVEX
-func (x Int8x64) Min(y Int8x64) Int8x64
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8
-// NotEqual compares for inequality.
-// Const Immediate = 4.
-//
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x64) NotEqual(y Int8x64) Mask8x64
+/* IsNan */
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Const Immediate = 3.
//
-// Asm: VPADDSB, CPU Feature: AVX512EVEX
-func (x Int8x64) SaturatedAdd(y Int8x64) Int8x64
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x4) IsNan(y Float32x4) Mask32x4
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Const Immediate = 3.
//
-// Asm: VPSUBSB, CPU Feature: AVX512EVEX
-func (x Int8x64) SaturatedSub(y Int8x64) Int8x64
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x8) IsNan(y Float32x8) Mask32x8
-// Sub subtracts corresponding elements of two vectors.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Const Immediate = 3.
//
-// Asm: VPSUBB, CPU Feature: AVX512EVEX
-func (x Int8x64) Sub(y Int8x64) Int8x64
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x16) IsNan(y Float32x16) Mask32x16
-// Add adds corresponding elements of two vectors.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Const Immediate = 3.
//
-// Asm: VPADDW, CPU Feature: AVX2
-func (x Uint16x16) Add(y Uint16x16) Uint16x16
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x2) IsNan(y Float64x2) Mask64x2
-// And performs a bitwise AND operation between two vectors.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Const Immediate = 3.
//
-// Asm: VPAND, CPU Feature: AVX2
-func (x Uint16x16) And(y Uint16x16) Uint16x16
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x4) IsNan(y Float64x4) Mask64x4
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Const Immediate = 3.
//
-// Asm: VPANDN, CPU Feature: AVX2
-func (x Uint16x16) AndNot(y Uint16x16) Uint16x16
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x8) IsNan(y Float64x8) Mask64x8
-// Average computes the rounded average of corresponding elements.
-//
-// Asm: VPAVGW, CPU Feature: AVX2
-func (x Uint16x16) Average(y Uint16x16) Uint16x16
+/* Less */
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x16) Equal(y Uint16x16) Mask16x16
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x4) Less(y Float32x4) Mask32x4
-// Greater compares for greater than.
-// Const Immediate = 6.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x16) Greater(y Uint16x16) Mask16x16
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x8) Less(y Float32x8) Mask32x8
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x16) Less(y Float32x16) Mask32x16
// Less compares for less than.
// Const Immediate = 1.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x16) Less(y Uint16x16) Mask16x16
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x2) Less(y Float64x2) Mask64x2
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x4) Less(y Float64x4) Mask64x4
-// PopCount counts the number of set bits in each element.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
-func (x Uint16x16) MaskedPopCount(y Mask16x16) Uint16x16
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x8) Less(y Float64x8) Mask64x8
-// Max computes the maximum of corresponding elements.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPMAXUW, CPU Feature: AVX2
-func (x Uint16x16) Max(y Uint16x16) Uint16x16
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x16) Less(y Int8x16) Mask8x16
-// Min computes the minimum of corresponding elements.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPMINUW, CPU Feature: AVX2
-func (x Uint16x16) Min(y Uint16x16) Uint16x16
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x32) Less(y Int8x32) Mask8x32
-// MulHigh multiplies elements and stores the high part of the result.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPMULHUW, CPU Feature: AVX2
-func (x Uint16x16) MulHigh(y Uint16x16) Uint16x16
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x64) Less(y Int8x64) Mask8x64
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x16) NotEqual(y Uint16x16) Mask16x16
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x8) Less(y Int16x8) Mask16x8
-// Or performs a bitwise OR operation between two vectors.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Uint16x16) Or(y Uint16x16) Uint16x16
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x16) Less(y Int16x16) Mask16x16
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPHADDW, CPU Feature: AVX2
-func (x Uint16x16) PairwiseAdd(y Uint16x16) Uint16x16
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x32) Less(y Int16x32) Mask16x32
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPHSUBW, CPU Feature: AVX2
-func (x Uint16x16) PairwiseSub(y Uint16x16) Uint16x16
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x4) Less(y Int32x4) Mask32x4
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPADDSW, CPU Feature: AVX2
-func (x Uint16x16) SaturatedAdd(y Uint16x16) Uint16x16
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x8) Less(y Int32x8) Mask32x8
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPSUBSW, CPU Feature: AVX2
-func (x Uint16x16) SaturatedSub(y Uint16x16) Uint16x16
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x16) Less(y Int32x16) Mask32x16
-// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
-func (x Uint16x16) SaturatedUnsignedSignedPairDotProd(y Int16x16) Int16x16
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x2) Less(y Int64x2) Mask64x2
-// Sub subtracts corresponding elements of two vectors.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPSUBW, CPU Feature: AVX2
-func (x Uint16x16) Sub(y Uint16x16) Uint16x16
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x4) Less(y Int64x4) Mask64x4
-// Xor performs a bitwise XOR operation between two vectors.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPXOR, CPU Feature: AVX2
-func (x Uint16x16) Xor(y Uint16x16) Uint16x16
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x8) Less(y Int64x8) Mask64x8
-// Add adds corresponding elements of two vectors.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPADDW, CPU Feature: AVX512EVEX
-func (x Uint16x32) Add(y Uint16x32) Uint16x32
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x16) Less(y Uint8x16) Mask8x16
-// Average computes the rounded average of corresponding elements.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPAVGW, CPU Feature: AVX512EVEX
-func (x Uint16x32) Average(y Uint16x32) Uint16x32
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x32) Less(y Uint8x32) Mask8x32
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x32) Equal(y Uint16x32) Mask16x32
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x64) Less(y Uint8x64) Mask8x64
-// Greater compares for greater than.
-// Const Immediate = 6.
+// Less compares for less than.
+// Const Immediate = 1.
//
// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x32) Greater(y Uint16x32) Mask16x32
+func (x Uint16x8) Less(y Uint16x8) Mask16x8
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// Less compares for less than.
+// Const Immediate = 1.
//
// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x32) GreaterEqual(y Uint16x32) Mask16x32
+func (x Uint16x16) Less(y Uint16x16) Mask16x16
// Less compares for less than.
// Const Immediate = 1.
// Asm: VPCMPUW, CPU Feature: AVX512EVEX
func (x Uint16x32) Less(y Uint16x32) Mask16x32
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
-func (x Uint16x32) MaskedPopCount(y Mask16x32) Uint16x32
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUW, CPU Feature: AVX512EVEX
-func (x Uint16x32) Max(y Uint16x32) Uint16x32
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUW, CPU Feature: AVX512EVEX
-func (x Uint16x32) Min(y Uint16x32) Uint16x32
-
-// MulHigh multiplies elements and stores the high part of the result, masked.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPMULHUW, CPU Feature: AVX512EVEX
-func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x4) Less(y Uint32x4) Mask32x4
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x8) Less(y Uint32x8) Mask32x8
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPADDSW, CPU Feature: AVX512EVEX
-func (x Uint16x32) SaturatedAdd(y Uint16x32) Uint16x32
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x16) Less(y Uint32x16) Mask32x16
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPSUBSW, CPU Feature: AVX512EVEX
-func (x Uint16x32) SaturatedSub(y Uint16x32) Uint16x32
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) Less(y Uint64x2) Mask64x2
-// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
-func (x Uint16x32) SaturatedUnsignedSignedPairDotProd(y Int16x32) Int16x32
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) Less(y Uint64x4) Mask64x4
-// Sub subtracts corresponding elements of two vectors.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPSUBW, CPU Feature: AVX512EVEX
-func (x Uint16x32) Sub(y Uint16x32) Uint16x32
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) Less(y Uint64x8) Mask64x8
-// Add adds corresponding elements of two vectors.
-//
-// Asm: VPADDW, CPU Feature: AVX
-func (x Uint16x8) Add(y Uint16x8) Uint16x8
+/* LessEqual */
-// And performs a bitwise AND operation between two vectors.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPAND, CPU Feature: AVX
-func (x Uint16x8) And(y Uint16x8) Uint16x8
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x4) LessEqual(y Float32x4) Mask32x4
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPANDN, CPU Feature: AVX
-func (x Uint16x8) AndNot(y Uint16x8) Uint16x8
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x8) LessEqual(y Float32x8) Mask32x8
-// Average computes the rounded average of corresponding elements.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPAVGW, CPU Feature: AVX
-func (x Uint16x8) Average(y Uint16x8) Uint16x8
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x16) LessEqual(y Float32x16) Mask32x16
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x8) Equal(y Uint16x8) Mask16x8
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x2) LessEqual(y Float64x2) Mask64x2
-// Greater compares for greater than.
-// Const Immediate = 6.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x8) Greater(y Uint16x8) Mask16x8
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x4) LessEqual(y Float64x4) Mask64x4
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x8) LessEqual(y Float64x8) Mask64x8
-// Less compares for less than.
-// Const Immediate = 1.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x8) Less(y Uint16x8) Mask16x8
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x16) LessEqual(y Int8x16) Mask8x16
// LessEqual compares for less than or equal.
// Const Immediate = 2.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x32) LessEqual(y Int8x32) Mask8x32
-// PopCount counts the number of set bits in each element.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
-func (x Uint16x8) MaskedPopCount(y Mask16x8) Uint16x8
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x64) LessEqual(y Int8x64) Mask8x64
-// Max computes the maximum of corresponding elements.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPMAXUW, CPU Feature: AVX
-func (x Uint16x8) Max(y Uint16x8) Uint16x8
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x8) LessEqual(y Int16x8) Mask16x8
-// Min computes the minimum of corresponding elements.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPMINUW, CPU Feature: AVX
-func (x Uint16x8) Min(y Uint16x8) Uint16x8
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x16) LessEqual(y Int16x16) Mask16x16
-// MulHigh multiplies elements and stores the high part of the result.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPMULHUW, CPU Feature: AVX
-func (x Uint16x8) MulHigh(y Uint16x8) Uint16x8
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x32) LessEqual(y Int16x32) Mask16x32
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x8) NotEqual(y Uint16x8) Mask16x8
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x4) LessEqual(y Int32x4) Mask32x4
-// Or performs a bitwise OR operation between two vectors.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPOR, CPU Feature: AVX
-func (x Uint16x8) Or(y Uint16x8) Uint16x8
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x8) LessEqual(y Int32x8) Mask32x8
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPHADDW, CPU Feature: AVX
-func (x Uint16x8) PairwiseAdd(y Uint16x8) Uint16x8
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x16) LessEqual(y Int32x16) Mask32x16
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPHSUBW, CPU Feature: AVX
-func (x Uint16x8) PairwiseSub(y Uint16x8) Uint16x8
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x2) LessEqual(y Int64x2) Mask64x2
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPADDSW, CPU Feature: AVX
-func (x Uint16x8) SaturatedAdd(y Uint16x8) Uint16x8
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x4) LessEqual(y Int64x4) Mask64x4
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPSUBSW, CPU Feature: AVX
-func (x Uint16x8) SaturatedSub(y Uint16x8) Uint16x8
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x8) LessEqual(y Int64x8) Mask64x8
-// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
-func (x Uint16x8) SaturatedUnsignedSignedPairDotProd(y Int16x8) Int16x8
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16
-// Sub subtracts corresponding elements of two vectors.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPSUBW, CPU Feature: AVX
-func (x Uint16x8) Sub(y Uint16x8) Uint16x8
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32
-// Xor performs a bitwise XOR operation between two vectors.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPXOR, CPU Feature: AVX
-func (x Uint16x8) Xor(y Uint16x8) Uint16x8
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64
-// Add adds corresponding elements of two vectors.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPADDD, CPU Feature: AVX512EVEX
-func (x Uint32x16) Add(y Uint32x16) Uint32x16
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8
-// And performs a masked bitwise AND operation between two vectors.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPANDD, CPU Feature: AVX512EVEX
-func (x Uint32x16) And(y Uint32x16) Uint32x16
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPANDND, CPU Feature: AVX512EVEX
-func (x Uint32x16) AndNot(y Uint32x16) Uint32x16
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x16) Equal(y Uint32x16) Mask32x16
+func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4
-// Greater compares for greater than.
-// Const Immediate = 6.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x16) Greater(y Uint32x16) Mask32x16
+func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16
+func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16
-// Less compares for less than.
-// Const Immediate = 1.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x16) Less(y Uint32x16) Mask32x16
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2
// LessEqual compares for less than or equal.
// Const Immediate = 2.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4
-// PopCount counts the number of set bits in each element.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
-func (x Uint32x16) MaskedPopCount(y Mask32x16) Uint32x16
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUD, CPU Feature: AVX512EVEX
-func (x Uint32x16) Max(y Uint32x16) Uint32x16
+/* MaskedAbsolute */
-// Min computes the minimum of corresponding elements.
+// Absolute computes the absolute value of each element.
//
-// Asm: VPMINUD, CPU Feature: AVX512EVEX
-func (x Uint32x16) Min(y Uint32x16) Uint32x16
+// Asm: VPABSB, CPU Feature: AVX512EVEX
+func (x Int8x16) MaskedAbsolute(y Mask8x16) Int8x16
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// Absolute computes the absolute value of each element.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16
+// Asm: VPABSB, CPU Feature: AVX512EVEX
+func (x Int8x32) MaskedAbsolute(y Mask8x32) Int8x32
-// Or performs a masked bitwise OR operation between two vectors.
+// Absolute computes the absolute value of each element.
//
-// Asm: VPORD, CPU Feature: AVX512EVEX
-func (x Uint32x16) Or(y Uint32x16) Uint32x16
+// Asm: VPABSB, CPU Feature: AVX512EVEX
+func (x Int8x64) MaskedAbsolute(y Mask8x64) Int8x64
-// Sub subtracts corresponding elements of two vectors.
+// Absolute computes the absolute value of each element.
//
-// Asm: VPSUBD, CPU Feature: AVX512EVEX
-func (x Uint32x16) Sub(y Uint32x16) Uint32x16
+// Asm: VPABSW, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedAbsolute(y Mask16x8) Int16x8
-// Xor performs a masked bitwise XOR operation between two vectors.
+// Absolute computes the absolute value of each element.
//
-// Asm: VPXORD, CPU Feature: AVX512EVEX
-func (x Uint32x16) Xor(y Uint32x16) Uint32x16
+// Asm: VPABSW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedAbsolute(y Mask16x16) Int16x16
-// Add adds corresponding elements of two vectors.
+// Absolute computes the absolute value of each element.
//
-// Asm: VPADDD, CPU Feature: AVX
-func (x Uint32x4) Add(y Uint32x4) Uint32x4
+// Asm: VPABSW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedAbsolute(y Mask16x32) Int16x32
-// And performs a bitwise AND operation between two vectors.
+// Absolute computes the absolute value of each element.
//
-// Asm: VPAND, CPU Feature: AVX
-func (x Uint32x4) And(y Uint32x4) Uint32x4
+// Asm: VPABSD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedAbsolute(y Mask32x4) Int32x4
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// Absolute computes the absolute value of each element.
//
-// Asm: VPANDN, CPU Feature: AVX
-func (x Uint32x4) AndNot(y Uint32x4) Uint32x4
+// Asm: VPABSD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedAbsolute(y Mask32x8) Int32x8
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// Absolute computes the absolute value of each element.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x4) Equal(y Uint32x4) Mask32x4
+// Asm: VPABSD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedAbsolute(y Mask32x16) Int32x16
-// Greater compares for greater than.
-// Const Immediate = 6.
+// Absolute computes the absolute value of each element.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x4) Greater(y Uint32x4) Mask32x4
+// Asm: VPABSQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedAbsolute(y Mask64x2) Int64x2
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// Absolute computes the absolute value of each element.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4
+// Asm: VPABSQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedAbsolute(y Mask64x4) Int64x4
-// Less compares for less than.
-// Const Immediate = 1.
+// Absolute computes the absolute value of each element.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x4) Less(y Uint32x4) Mask32x4
+// Asm: VPABSQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedAbsolute(y Mask64x8) Int64x8
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4
+/* MaskedAdd */
-// PopCount counts the number of set bits in each element.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
-func (x Uint32x4) MaskedPopCount(y Mask32x4) Uint32x4
+// Asm: VADDPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedAdd(y Float32x4, z Mask32x4) Float32x4
-// Max computes the maximum of corresponding elements.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPMAXUD, CPU Feature: AVX
-func (x Uint32x4) Max(y Uint32x4) Uint32x4
+// Asm: VADDPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedAdd(y Float32x8, z Mask32x8) Float32x8
-// Min computes the minimum of corresponding elements.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPMINUD, CPU Feature: AVX
-func (x Uint32x4) Min(y Uint32x4) Uint32x4
+// Asm: VADDPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedAdd(y Float32x16, z Mask32x16) Float32x16
-// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPMULUDQ, CPU Feature: AVX
-func (x Uint32x4) MulEvenWiden(y Uint32x4) Uint64x2
+// Asm: VADDPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedAdd(y Float64x2, z Mask64x2) Float64x2
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x4) NotEqual(y Uint32x4) Mask32x4
+// Asm: VADDPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedAdd(y Float64x4, z Mask64x4) Float64x4
-// Or performs a bitwise OR operation between two vectors.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPOR, CPU Feature: AVX
-func (x Uint32x4) Or(y Uint32x4) Uint32x4
+// Asm: VADDPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedAdd(y Float64x8, z Mask64x8) Float64x8
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPHADDD, CPU Feature: AVX
-func (x Uint32x4) PairwiseAdd(y Uint32x4) Uint32x4
+// Asm: VPADDB, CPU Feature: AVX512EVEX
+func (x Int8x16) MaskedAdd(y Int8x16, z Mask8x16) Int8x16
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPHSUBD, CPU Feature: AVX
-func (x Uint32x4) PairwiseSub(y Uint32x4) Uint32x4
+// Asm: VPADDB, CPU Feature: AVX512EVEX
+func (x Int8x32) MaskedAdd(y Int8x32, z Mask8x32) Int8x32
-// Sub subtracts corresponding elements of two vectors.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPSUBD, CPU Feature: AVX
-func (x Uint32x4) Sub(y Uint32x4) Uint32x4
+// Asm: VPADDB, CPU Feature: AVX512EVEX
+func (x Int8x64) MaskedAdd(y Int8x64, z Mask8x64) Int8x64
-// Xor performs a bitwise XOR operation between two vectors.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPXOR, CPU Feature: AVX
-func (x Uint32x4) Xor(y Uint32x4) Uint32x4
+// Asm: VPADDW, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedAdd(y Int16x8, z Mask16x8) Int16x8
// Add adds corresponding elements of two vectors.
//
-// Asm: VPADDD, CPU Feature: AVX2
-func (x Uint32x8) Add(y Uint32x8) Uint32x8
+// Asm: VPADDW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedAdd(y Int16x16, z Mask16x16) Int16x16
-// And performs a bitwise AND operation between two vectors.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPAND, CPU Feature: AVX2
-func (x Uint32x8) And(y Uint32x8) Uint32x8
+// Asm: VPADDW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedAdd(y Int16x32, z Mask16x32) Int16x32
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPANDN, CPU Feature: AVX2
-func (x Uint32x8) AndNot(y Uint32x8) Uint32x8
+// Asm: VPADDD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedAdd(y Int32x4, z Mask32x4) Int32x4
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x8) Equal(y Uint32x8) Mask32x8
+// Asm: VPADDD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedAdd(y Int32x8, z Mask32x8) Int32x8
-// Greater compares for greater than.
-// Const Immediate = 6.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x8) Greater(y Uint32x8) Mask32x8
+// Asm: VPADDD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedAdd(y Int32x16, z Mask32x16) Int32x16
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8
+// Asm: VPADDQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedAdd(y Int64x2, z Mask64x2) Int64x2
-// Less compares for less than.
-// Const Immediate = 1.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x8) Less(y Uint32x8) Mask32x8
+// Asm: VPADDQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedAdd(y Int64x4, z Mask64x4) Int64x4
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8
+// Asm: VPADDQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedAdd(y Int64x8, z Mask64x8) Int64x8
-// PopCount counts the number of set bits in each element.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
-func (x Uint32x8) MaskedPopCount(y Mask32x8) Uint32x8
+// Asm: VPADDB, CPU Feature: AVX512EVEX
+func (x Uint8x16) MaskedAdd(y Uint8x16, z Mask8x16) Uint8x16
-// Max computes the maximum of corresponding elements.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPMAXUD, CPU Feature: AVX2
-func (x Uint32x8) Max(y Uint32x8) Uint32x8
+// Asm: VPADDB, CPU Feature: AVX512EVEX
+func (x Uint8x32) MaskedAdd(y Uint8x32, z Mask8x32) Uint8x32
-// Min computes the minimum of corresponding elements.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPMINUD, CPU Feature: AVX2
-func (x Uint32x8) Min(y Uint32x8) Uint32x8
+// Asm: VPADDB, CPU Feature: AVX512EVEX
+func (x Uint8x64) MaskedAdd(y Uint8x64, z Mask8x64) Uint8x64
-// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPMULUDQ, CPU Feature: AVX2
-func (x Uint32x8) MulEvenWiden(y Uint32x8) Uint64x4
+// Asm: VPADDW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedAdd(y Uint16x8, z Mask16x8) Uint16x8
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x8) NotEqual(y Uint32x8) Mask32x8
+// Asm: VPADDW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedAdd(y Uint16x16, z Mask16x16) Uint16x16
-// Or performs a bitwise OR operation between two vectors.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Uint32x8) Or(y Uint32x8) Uint32x8
-
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDD, CPU Feature: AVX2
-func (x Uint32x8) PairwiseAdd(y Uint32x8) Uint32x8
+// Asm: VPADDW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedAdd(y Uint16x32, z Mask16x32) Uint16x32
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPHSUBD, CPU Feature: AVX2
-func (x Uint32x8) PairwiseSub(y Uint32x8) Uint32x8
+// Asm: VPADDD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedAdd(y Uint32x4, z Mask32x4) Uint32x4
-// Sub subtracts corresponding elements of two vectors.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPSUBD, CPU Feature: AVX2
-func (x Uint32x8) Sub(y Uint32x8) Uint32x8
+// Asm: VPADDD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedAdd(y Uint32x8, z Mask32x8) Uint32x8
-// Xor performs a bitwise XOR operation between two vectors.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPXOR, CPU Feature: AVX2
-func (x Uint32x8) Xor(y Uint32x8) Uint32x8
+// Asm: VPADDD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedAdd(y Uint32x16, z Mask32x16) Uint32x16
// Add adds corresponding elements of two vectors.
//
-// Asm: VPADDQ, CPU Feature: AVX
-func (x Uint64x2) Add(y Uint64x2) Uint64x2
+// Asm: VPADDQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedAdd(y Uint64x2, z Mask64x2) Uint64x2
-// And performs a bitwise AND operation between two vectors.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPAND, CPU Feature: AVX
-func (x Uint64x2) And(y Uint64x2) Uint64x2
+// Asm: VPADDQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedAdd(y Uint64x4, z Mask64x4) Uint64x4
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// Add adds corresponding elements of two vectors.
//
-// Asm: VPANDN, CPU Feature: AVX
-func (x Uint64x2) AndNot(y Uint64x2) Uint64x2
+// Asm: VPADDQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedAdd(y Uint64x8, z Mask64x8) Uint64x8
-// Equal compares for equality, masked.
-// Const Immediate = 0.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) Equal(y Uint64x2) Mask64x2
+/* MaskedAnd */
-// Greater compares for greater than.
-// Const Immediate = 6.
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) Greater(y Uint64x2) Mask64x2
+// Asm: VANDPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedAnd(y Float32x4, z Mask32x4) Float32x4
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2
+// Asm: VANDPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedAnd(y Float32x8, z Mask32x8) Float32x8
-// Less compares for less than.
-// Const Immediate = 1.
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) Less(y Uint64x2) Mask64x2
+// Asm: VANDPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedAnd(y Float32x16, z Mask32x16) Float32x16
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2
+// Asm: VANDPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedAnd(y Float64x2, z Mask64x2) Float64x2
-// PopCount counts the number of set bits in each element.
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) MaskedPopCount(y Mask64x2) Uint64x2
+// Asm: VANDPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedAnd(y Float64x4, z Mask64x4) Float64x4
-// Max computes the maximum of corresponding elements.
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VPMAXUQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) Max(y Uint64x2) Uint64x2
+// Asm: VANDPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedAnd(y Float64x8, z Mask64x8) Float64x8
-// Min computes the minimum of corresponding elements.
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VPMINUQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) Min(y Uint64x2) Uint64x2
+// Asm: VPANDD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedAnd(y Int32x4, z Mask32x4) Int32x4
-// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VPMULUDQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) MulEvenWiden(y Uint64x2) Uint64x2
+// Asm: VPANDD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedAnd(y Int32x8, z Mask32x8) Int32x8
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) NotEqual(y Uint64x2) Mask64x2
+// Asm: VPANDD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedAnd(y Int32x16, z Mask32x16) Int32x16
-// Or performs a bitwise OR operation between two vectors.
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VPOR, CPU Feature: AVX
-func (x Uint64x2) Or(y Uint64x2) Uint64x2
+// Asm: VPANDQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedAnd(y Int64x2, z Mask64x2) Int64x2
-// Sub subtracts corresponding elements of two vectors.
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VPSUBQ, CPU Feature: AVX
-func (x Uint64x2) Sub(y Uint64x2) Uint64x2
+// Asm: VPANDQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedAnd(y Int64x4, z Mask64x4) Int64x4
-// Xor performs a bitwise XOR operation between two vectors.
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VPXOR, CPU Feature: AVX
-func (x Uint64x2) Xor(y Uint64x2) Uint64x2
+// Asm: VPANDQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedAnd(y Int64x8, z Mask64x8) Int64x8
-// Add adds corresponding elements of two vectors.
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VPADDQ, CPU Feature: AVX2
-func (x Uint64x4) Add(y Uint64x4) Uint64x4
+// Asm: VPANDD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedAnd(y Uint32x4, z Mask32x4) Uint32x4
-// And performs a bitwise AND operation between two vectors.
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VPAND, CPU Feature: AVX2
-func (x Uint64x4) And(y Uint64x4) Uint64x4
+// Asm: VPANDD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedAnd(y Uint32x8, z Mask32x8) Uint32x8
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VPANDN, CPU Feature: AVX2
-func (x Uint64x4) AndNot(y Uint64x4) Uint64x4
+// Asm: VPANDD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedAnd(y Uint32x16, z Mask32x16) Uint32x16
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) Equal(y Uint64x4) Mask64x4
+// Asm: VPANDQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedAnd(y Uint64x2, z Mask64x2) Uint64x2
-// Greater compares for greater than.
-// Const Immediate = 6.
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) Greater(y Uint64x4) Mask64x4
+// Asm: VPANDQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedAnd(y Uint64x4, z Mask64x4) Uint64x4
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// And performs a masked bitwise AND operation between two vectors.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4
+// Asm: VPANDQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedAnd(y Uint64x8, z Mask64x8) Uint64x8
-// Less compares for less than.
-// Const Immediate = 1.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) Less(y Uint64x4) Mask64x4
+/* MaskedAndNot */
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4
+// Asm: VANDNPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedAndNot(y Float32x4, z Mask32x4) Float32x4
-// PopCount counts the number of set bits in each element.
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) MaskedPopCount(y Mask64x4) Uint64x4
+// Asm: VANDNPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedAndNot(y Float32x8, z Mask32x8) Float32x8
-// Max computes the maximum of corresponding elements.
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VPMAXUQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) Max(y Uint64x4) Uint64x4
+// Asm: VANDNPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedAndNot(y Float32x16, z Mask32x16) Float32x16
-// Min computes the minimum of corresponding elements.
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VPMINUQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) Min(y Uint64x4) Uint64x4
+// Asm: VANDNPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedAndNot(y Float64x2, z Mask64x2) Float64x2
-// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VPMULUDQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) MulEvenWiden(y Uint64x4) Uint64x4
+// Asm: VANDNPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedAndNot(y Float64x4, z Mask64x4) Float64x4
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4
+// Asm: VANDNPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedAndNot(y Float64x8, z Mask64x8) Float64x8
-// Or performs a bitwise OR operation between two vectors.
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Uint64x4) Or(y Uint64x4) Uint64x4
+// Asm: VPANDND, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedAndNot(y Int32x4, z Mask32x4) Int32x4
-// Sub subtracts corresponding elements of two vectors.
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VPSUBQ, CPU Feature: AVX2
-func (x Uint64x4) Sub(y Uint64x4) Uint64x4
+// Asm: VPANDND, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedAndNot(y Int32x8, z Mask32x8) Int32x8
-// Xor performs a bitwise XOR operation between two vectors.
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VPXOR, CPU Feature: AVX2
-func (x Uint64x4) Xor(y Uint64x4) Uint64x4
+// Asm: VPANDND, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedAndNot(y Int32x16, z Mask32x16) Int32x16
-// Add adds corresponding elements of two vectors.
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VPADDQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) Add(y Uint64x8) Uint64x8
+// Asm: VPANDNQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedAndNot(y Int64x2, z Mask64x2) Int64x2
-// And performs a masked bitwise AND operation between two vectors.
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VPANDQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) And(y Uint64x8) Uint64x8
+// Asm: VPANDNQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedAndNot(y Int64x4, z Mask64x4) Int64x4
// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
// Asm: VPANDNQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) AndNot(y Uint64x8) Uint64x8
+func (x Int64x8) MaskedAndNot(y Int64x8, z Mask64x8) Int64x8
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) Equal(y Uint64x8) Mask64x8
+// Asm: VPANDND, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedAndNot(y Uint32x4, z Mask32x4) Uint32x4
-// Greater compares for greater than.
-// Const Immediate = 6.
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) Greater(y Uint64x8) Mask64x8
+// Asm: VPANDND, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedAndNot(y Uint32x8, z Mask32x8) Uint32x8
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8
+// Asm: VPANDND, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedAndNot(y Uint32x16, z Mask32x16) Uint32x16
-// Less compares for less than.
-// Const Immediate = 1.
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) Less(y Uint64x8) Mask64x8
+// Asm: VPANDNQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedAndNot(y Uint64x2, z Mask64x2) Uint64x2
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8
+// Asm: VPANDNQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedAndNot(y Uint64x4, z Mask64x4) Uint64x4
-// PopCount counts the number of set bits in each element.
+// AndNot performs a masked bitwise AND NOT operation between two vectors.
//
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) MaskedPopCount(y Mask64x8) Uint64x8
+// Asm: VPANDNQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedAndNot(y Uint64x8, z Mask64x8) Uint64x8
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) Max(y Uint64x8) Uint64x8
+/* MaskedApproximateReciprocal */
-// Min computes the minimum of corresponding elements.
+// ApproximateReciprocal computes an approximate reciprocal of each element.
//
-// Asm: VPMINUQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) Min(y Uint64x8) Uint64x8
+// Asm: VRCP14PS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedApproximateReciprocal(y Mask32x4) Float32x4
-// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// ApproximateReciprocal computes an approximate reciprocal of each element.
//
-// Asm: VPMULUDQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) MulEvenWiden(y Uint64x8) Uint64x8
+// Asm: VRCP14PS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedApproximateReciprocal(y Mask32x8) Float32x8
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// ApproximateReciprocal computes an approximate reciprocal of each element.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8
+// Asm: VRCP14PS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedApproximateReciprocal(y Mask32x16) Float32x16
-// Or performs a masked bitwise OR operation between two vectors.
+// ApproximateReciprocal computes an approximate reciprocal of each element.
//
-// Asm: VPORQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) Or(y Uint64x8) Uint64x8
+// Asm: VRCP14PD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedApproximateReciprocal(y Mask64x2) Float64x2
-// Sub subtracts corresponding elements of two vectors.
+// ApproximateReciprocal computes an approximate reciprocal of each element.
//
-// Asm: VPSUBQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) Sub(y Uint64x8) Uint64x8
+// Asm: VRCP14PD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedApproximateReciprocal(y Mask64x4) Float64x4
-// Xor performs a masked bitwise XOR operation between two vectors.
+// ApproximateReciprocal computes an approximate reciprocal of each element.
//
-// Asm: VPXORQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) Xor(y Uint64x8) Uint64x8
+// Asm: VRCP14PD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedApproximateReciprocal(y Mask64x8) Float64x8
-// Add adds corresponding elements of two vectors.
-//
-// Asm: VPADDB, CPU Feature: AVX
-func (x Uint8x16) Add(y Uint8x16) Uint8x16
+/* MaskedApproximateReciprocalOfSqrt */
-// And performs a bitwise AND operation between two vectors.
+// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
//
-// Asm: VPAND, CPU Feature: AVX
-func (x Uint8x16) And(y Uint8x16) Uint8x16
+// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedApproximateReciprocalOfSqrt(y Mask32x4) Float32x4
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
//
-// Asm: VPANDN, CPU Feature: AVX
-func (x Uint8x16) AndNot(y Uint8x16) Uint8x16
+// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedApproximateReciprocalOfSqrt(y Mask32x8) Float32x8
-// Average computes the rounded average of corresponding elements.
+// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
//
-// Asm: VPAVGB, CPU Feature: AVX
-func (x Uint8x16) Average(y Uint8x16) Uint8x16
+// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedApproximateReciprocalOfSqrt(y Mask32x16) Float32x16
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x16) Equal(y Uint8x16) Mask8x16
+// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedApproximateReciprocalOfSqrt(y Mask64x2) Float64x2
-// Greater compares for greater than.
-// Const Immediate = 6.
+// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x16) Greater(y Uint8x16) Mask8x16
+// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedApproximateReciprocalOfSqrt(y Mask64x4) Float64x4
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16
+// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedApproximateReciprocalOfSqrt(y Mask64x8) Float64x8
-// Less compares for less than.
-// Const Immediate = 1.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x16) Less(y Uint8x16) Mask8x16
+/* MaskedAverage */
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// Average computes the rounded average of corresponding elements.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16
+// Asm: VPAVGB, CPU Feature: AVX512EVEX
+func (x Uint8x16) MaskedAverage(y Uint8x16, z Mask8x16) Uint8x16
-// PopCount counts the number of set bits in each element.
+// Average computes the rounded average of corresponding elements.
//
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
-func (x Uint8x16) MaskedPopCount(y Mask8x16) Uint8x16
+// Asm: VPAVGB, CPU Feature: AVX512EVEX
+func (x Uint8x32) MaskedAverage(y Uint8x32, z Mask8x32) Uint8x32
-// Max computes the maximum of corresponding elements.
+// Average computes the rounded average of corresponding elements.
//
-// Asm: VPMAXUB, CPU Feature: AVX
-func (x Uint8x16) Max(y Uint8x16) Uint8x16
+// Asm: VPAVGB, CPU Feature: AVX512EVEX
+func (x Uint8x64) MaskedAverage(y Uint8x64, z Mask8x64) Uint8x64
-// Min computes the minimum of corresponding elements.
+// Average computes the rounded average of corresponding elements.
//
-// Asm: VPMINUB, CPU Feature: AVX
-func (x Uint8x16) Min(y Uint8x16) Uint8x16
+// Asm: VPAVGW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedAverage(y Uint16x8, z Mask16x8) Uint16x8
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// Average computes the rounded average of corresponding elements.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x16) NotEqual(y Uint8x16) Mask8x16
+// Asm: VPAVGW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedAverage(y Uint16x16, z Mask16x16) Uint16x16
-// Or performs a bitwise OR operation between two vectors.
+// Average computes the rounded average of corresponding elements.
//
-// Asm: VPOR, CPU Feature: AVX
-func (x Uint8x16) Or(y Uint8x16) Uint8x16
+// Asm: VPAVGW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedAverage(y Uint16x32, z Mask16x32) Uint16x32
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSB, CPU Feature: AVX
-func (x Uint8x16) SaturatedAdd(y Uint8x16) Uint8x16
+/* MaskedCeilSuppressExceptionWithPrecision */
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VPSUBSB, CPU Feature: AVX
-func (x Uint8x16) SaturatedSub(y Uint8x16) Uint8x16
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
-// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
+// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VPMADDUBSW, CPU Feature: AVX
-func (x Uint8x16) SaturatedUnsignedSignedPairDotProd(y Int8x16) Int16x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
-// Sub subtracts corresponding elements of two vectors.
+// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VPSUBB, CPU Feature: AVX
-func (x Uint8x16) Sub(y Uint8x16) Uint8x16
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
-// Xor performs a bitwise XOR operation between two vectors.
+// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VPXOR, CPU Feature: AVX
-func (x Uint8x16) Xor(y Uint8x16) Uint8x16
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
-// Add adds corresponding elements of two vectors.
+// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VPADDB, CPU Feature: AVX2
-func (x Uint8x32) Add(y Uint8x32) Uint8x32
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
-// And performs a bitwise AND operation between two vectors.
+// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VPAND, CPU Feature: AVX2
-func (x Uint8x32) And(y Uint8x32) Uint8x32
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
-// AndNot performs a bitwise AND NOT operation between two vectors.
-//
-// Asm: VPANDN, CPU Feature: AVX2
-func (x Uint8x32) AndNot(y Uint8x32) Uint8x32
+/* MaskedCeilWithPrecision */
-// Average computes the rounded average of corresponding elements.
+// CeilWithPrecision rounds elements up with specified precision, masked.
+// Const Immediate = 2.
//
-// Asm: VPAVGB, CPU Feature: AVX2
-func (x Uint8x32) Average(y Uint8x32) Uint8x32
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedCeilWithPrecision(imm uint8, y Mask32x4) Float32x4
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// CeilWithPrecision rounds elements up with specified precision, masked.
+// Const Immediate = 2.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x32) Equal(y Uint8x32) Mask8x32
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedCeilWithPrecision(imm uint8, y Mask32x8) Float32x8
-// Greater compares for greater than.
-// Const Immediate = 6.
+// CeilWithPrecision rounds elements up with specified precision, masked.
+// Const Immediate = 2.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x32) Greater(y Uint8x32) Mask8x32
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedCeilWithPrecision(imm uint8, y Mask32x16) Float32x16
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// CeilWithPrecision rounds elements up with specified precision, masked.
+// Const Immediate = 2.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedCeilWithPrecision(imm uint8, y Mask64x2) Float64x2
-// Less compares for less than.
-// Const Immediate = 1.
+// CeilWithPrecision rounds elements up with specified precision, masked.
+// Const Immediate = 2.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x32) Less(y Uint8x32) Mask8x32
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedCeilWithPrecision(imm uint8, y Mask64x4) Float64x4
-// LessEqual compares for less than or equal.
+// CeilWithPrecision rounds elements up with specified precision, masked.
// Const Immediate = 2.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedCeilWithPrecision(imm uint8, y Mask64x8) Float64x8
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
-func (x Uint8x32) MaskedPopCount(y Mask8x32) Uint8x32
+/* MaskedDiffWithCeilSuppressExceptionWithPrecision */
-// Max computes the maximum of corresponding elements.
+// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VPMAXUB, CPU Feature: AVX2
-func (x Uint8x32) Max(y Uint8x32) Uint8x32
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
-// Min computes the minimum of corresponding elements.
+// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VPMINUB, CPU Feature: AVX2
-func (x Uint8x32) Min(y Uint8x32) Uint8x32
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x32) NotEqual(y Uint8x32) Mask8x32
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
-// Or performs a bitwise OR operation between two vectors.
+// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Uint8x32) Or(y Uint8x32) Uint8x32
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
+// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VPADDSB, CPU Feature: AVX2
-func (x Uint8x32) SaturatedAdd(y Uint8x32) Uint8x32
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
+// Const Immediate = 10.
//
-// Asm: VPSUBSB, CPU Feature: AVX2
-func (x Uint8x32) SaturatedSub(y Uint8x32) Uint8x32
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
-// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// Asm: VPMADDUBSW, CPU Feature: AVX2
-func (x Uint8x32) SaturatedUnsignedSignedPairDotProd(y Int8x32) Int16x16
+/* MaskedDiffWithCeilWithPrecision */
-// Sub subtracts corresponding elements of two vectors.
+// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
+// Const Immediate = 2.
//
-// Asm: VPSUBB, CPU Feature: AVX2
-func (x Uint8x32) Sub(y Uint8x32) Uint8x32
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask32x4) Float32x4
-// Xor performs a bitwise XOR operation between two vectors.
+// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
+// Const Immediate = 2.
//
-// Asm: VPXOR, CPU Feature: AVX2
-func (x Uint8x32) Xor(y Uint8x32) Uint8x32
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask32x8) Float32x8
-// Add adds corresponding elements of two vectors.
+// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
+// Const Immediate = 2.
//
-// Asm: VPADDB, CPU Feature: AVX512EVEX
-func (x Uint8x64) Add(y Uint8x64) Uint8x64
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask32x16) Float32x16
-// Average computes the rounded average of corresponding elements.
+// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
+// Const Immediate = 2.
//
-// Asm: VPAVGB, CPU Feature: AVX512EVEX
-func (x Uint8x64) Average(y Uint8x64) Uint8x64
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask64x2) Float64x2
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
+// Const Immediate = 2.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x64) Equal(y Uint8x64) Mask8x64
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask64x4) Float64x4
-// Greater compares for greater than.
-// Const Immediate = 6.
+// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
+// Const Immediate = 2.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x64) Greater(y Uint8x64) Mask8x64
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask64x8) Float64x8
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x64) GreaterEqual(y Uint8x64) Mask8x64
+/* MaskedDiffWithFloorSuppressExceptionWithPrecision */
-// Less compares for less than.
-// Const Immediate = 1.
+// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
+// Const Immediate = 9.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x64) Less(y Uint8x64) Mask8x64
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
+// Const Immediate = 9.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
-// PopCount counts the number of set bits in each element.
+// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
+// Const Immediate = 9.
//
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
-func (x Uint8x64) MaskedPopCount(y Mask8x64) Uint8x64
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
-// Max computes the maximum of corresponding elements.
+// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
+// Const Immediate = 9.
//
-// Asm: VPMAXUB, CPU Feature: AVX512EVEX
-func (x Uint8x64) Max(y Uint8x64) Uint8x64
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
-// Min computes the minimum of corresponding elements.
+// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
+// Const Immediate = 9.
//
-// Asm: VPMINUB, CPU Feature: AVX512EVEX
-func (x Uint8x64) Min(y Uint8x64) Uint8x64
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
+// Const Immediate = 9.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSB, CPU Feature: AVX512EVEX
-func (x Uint8x64) SaturatedAdd(y Uint8x64) Uint8x64
+/* MaskedDiffWithFloorWithPrecision */
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
+// Const Immediate = 1.
//
-// Asm: VPSUBSB, CPU Feature: AVX512EVEX
-func (x Uint8x64) SaturatedSub(y Uint8x64) Uint8x64
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask32x4) Float32x4
-// Sub subtracts corresponding elements of two vectors.
+// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
+// Const Immediate = 1.
//
-// Asm: VPSUBB, CPU Feature: AVX512EVEX
-func (x Uint8x64) Sub(y Uint8x64) Uint8x64
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask32x8) Float32x8
-// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
+// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
+// Const Immediate = 1.
//
-// Asm: VFMADD132PS, CPU Feature: AVX512EVEX
-func (x Float32x16) FusedMultiplyAdd132(y Float32x16, z Float32x16) Float32x16
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask32x16) Float32x16
-// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
+// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
+// Const Immediate = 1.
//
-// Asm: VFMADD213PS, CPU Feature: AVX512EVEX
-func (x Float32x16) FusedMultiplyAdd213(y Float32x16, z Float32x16) Float32x16
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask64x2) Float64x2
-// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
+// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
+// Const Immediate = 1.
//
-// Asm: VFMADD231PS, CPU Feature: AVX512EVEX
-func (x Float32x16) FusedMultiplyAdd231(y Float32x16, z Float32x16) Float32x16
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask64x4) Float64x4
-// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
+// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
+// Const Immediate = 1.
//
-// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX
-func (x Float32x16) FusedMultiplyAddSub132(y Float32x16, z Float32x16) Float32x16
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask64x8) Float64x8
-// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX
-func (x Float32x16) FusedMultiplyAddSub213(y Float32x16, z Float32x16) Float32x16
+/* MaskedDiffWithRoundSuppressExceptionWithPrecision */
-// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
+// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
+// Const Immediate = 8.
//
-// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX
-func (x Float32x16) FusedMultiplyAddSub231(y Float32x16, z Float32x16) Float32x16
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
-// FusedMultiplySub132 performs `(v1 * v3) - v2`.
+// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
+// Const Immediate = 8.
//
-// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX
-func (x Float32x16) FusedMultiplySub132(y Float32x16, z Float32x16) Float32x16
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
-// FusedMultiplySub213 performs `(v2 * v1) - v3`.
+// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
+// Const Immediate = 8.
//
-// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX
-func (x Float32x16) FusedMultiplySub213(y Float32x16, z Float32x16) Float32x16
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
-// FusedMultiplySub231 performs `(v2 * v3) - v1`.
+// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
+// Const Immediate = 8.
//
-// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX
-func (x Float32x16) FusedMultiplySub231(y Float32x16, z Float32x16) Float32x16
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
-// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
+// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
+// Const Immediate = 8.
//
-// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX
-func (x Float32x16) FusedMultiplySubAdd132(y Float32x16, z Float32x16) Float32x16
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
-// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
+// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
+// Const Immediate = 8.
//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX
-func (x Float32x16) FusedMultiplySubAdd213(y Float32x16, z Float32x16) Float32x16
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
-// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
-//
-// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX
-func (x Float32x16) FusedMultiplySubAdd231(y Float32x16, z Float32x16) Float32x16
+/* MaskedDiffWithRoundWithPrecision */
-// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
+// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
+// Const Immediate = 0.
//
-// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX
-func (x Float32x16) FusedNegativeMultiplyAdd132(y Float32x16, z Float32x16) Float32x16
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask32x4) Float32x4
-// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
+// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
+// Const Immediate = 0.
//
-// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX
-func (x Float32x16) FusedNegativeMultiplyAdd213(y Float32x16, z Float32x16) Float32x16
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask32x8) Float32x8
-// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
+// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
+// Const Immediate = 0.
//
-// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX
-func (x Float32x16) FusedNegativeMultiplyAdd231(y Float32x16, z Float32x16) Float32x16
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask32x16) Float32x16
-// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
+// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
+// Const Immediate = 0.
//
-// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX
-func (x Float32x16) FusedNegativeMultiplySub132(y Float32x16, z Float32x16) Float32x16
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask64x2) Float64x2
-// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
+// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
+// Const Immediate = 0.
//
-// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX
-func (x Float32x16) FusedNegativeMultiplySub213(y Float32x16, z Float32x16) Float32x16
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask64x4) Float64x4
-// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
+// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
+// Const Immediate = 0.
//
-// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX
-func (x Float32x16) FusedNegativeMultiplySub231(y Float32x16, z Float32x16) Float32x16
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask64x8) Float64x8
-// Add adds corresponding elements of two vectors.
-//
-// Asm: VADDPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedAdd(y Float32x16, z Mask32x16) Float32x16
+/* MaskedDiffWithTruncSuppressExceptionWithPrecision */
-// And performs a masked bitwise AND operation between two vectors.
+// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
+// Const Immediate = 11.
//
-// Asm: VANDPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedAnd(y Float32x16, z Mask32x16) Float32x16
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
+// Const Immediate = 11.
//
-// Asm: VANDNPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedAndNot(y Float32x16, z Mask32x16) Float32x16
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
-// Div divides elements of two vectors.
+// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
+// Const Immediate = 11.
//
-// Asm: VDIVPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedDiv(y Float32x16, z Mask32x16) Float32x16
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
+// Const Immediate = 11.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedEqual(y Float32x16, z Mask32x16) Mask32x16
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
-// Greater compares for greater than.
-// Const Immediate = 6.
+// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
+// Const Immediate = 11.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedGreater(y Float32x16, z Mask32x16) Mask32x16
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
+// Const Immediate = 11.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedGreaterEqual(y Float32x16, z Mask32x16) Mask32x16
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
+/* MaskedDiffWithTruncWithPrecision */
+
+// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
// Const Immediate = 3.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedIsNan(y Float32x16, z Mask32x16) Mask32x16
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask32x4) Float32x4
-// Less compares for less than.
-// Const Immediate = 1.
+// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
+// Const Immediate = 3.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedLess(y Float32x16, z Mask32x16) Mask32x16
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask32x8) Float32x8
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
+// Const Immediate = 3.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedLessEqual(y Float32x16, z Mask32x16) Mask32x16
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask32x16) Float32x16
-// Max computes the maximum of corresponding elements.
+// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
+// Const Immediate = 3.
//
-// Asm: VMAXPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedMax(y Float32x16, z Mask32x16) Float32x16
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask64x2) Float64x2
-// Min computes the minimum of corresponding elements.
+// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
+// Const Immediate = 3.
//
-// Asm: VMINPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedMin(y Float32x16, z Mask32x16) Float32x16
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask64x4) Float64x4
-// Mul multiplies corresponding elements of two vectors, masked.
+// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
+// Const Immediate = 3.
//
-// Asm: VMULPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedMul(y Float32x16, z Mask32x16) Float32x16
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask64x8) Float64x8
-// MulByPowOf2 multiplies elements by a power of 2.
-//
-// Asm: VSCALEFPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedMulByPowOf2(y Float32x16, z Mask32x16) Float32x16
+/* MaskedDiv */
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// Div divides elements of two vectors.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedNotEqual(y Float32x16, z Mask32x16) Mask32x16
+// Asm: VDIVPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedDiv(y Float32x4, z Mask32x4) Float32x4
-// Or performs a masked bitwise OR operation between two vectors.
+// Div divides elements of two vectors.
//
-// Asm: VORPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedOr(y Float32x16, z Mask32x16) Float32x16
+// Asm: VDIVPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedDiv(y Float32x8, z Mask32x8) Float32x8
-// Sub subtracts corresponding elements of two vectors.
+// Div divides elements of two vectors.
//
-// Asm: VADDPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedSub(y Float32x16, z Mask32x16) Float32x16
+// Asm: VDIVPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedDiv(y Float32x16, z Mask32x16) Float32x16
-// Xor performs a masked bitwise XOR operation between two vectors.
+// Div divides elements of two vectors.
//
-// Asm: VXORPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedXor(y Float32x16, z Mask32x16) Float32x16
+// Asm: VDIVPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedDiv(y Float64x2, z Mask64x2) Float64x2
-// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
+// Div divides elements of two vectors.
//
-// Asm: VFMADD132PS, CPU Feature: AVX512EVEX
-func (x Float32x4) FusedMultiplyAdd132(y Float32x4, z Float32x4) Float32x4
+// Asm: VDIVPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedDiv(y Float64x4, z Mask64x4) Float64x4
-// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
+// Div divides elements of two vectors.
//
-// Asm: VFMADD213PS, CPU Feature: AVX512EVEX
-func (x Float32x4) FusedMultiplyAdd213(y Float32x4, z Float32x4) Float32x4
+// Asm: VDIVPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedDiv(y Float64x8, z Mask64x8) Float64x8
-// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
-//
-// Asm: VFMADD231PS, CPU Feature: AVX512EVEX
-func (x Float32x4) FusedMultiplyAdd231(y Float32x4, z Float32x4) Float32x4
+/* MaskedEqual */
-// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX
-func (x Float32x4) FusedMultiplyAddSub132(y Float32x4, z Float32x4) Float32x4
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedEqual(y Float32x4, z Mask32x4) Mask32x4
-// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX
-func (x Float32x4) FusedMultiplyAddSub213(y Float32x4, z Float32x4) Float32x4
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedEqual(y Float32x8, z Mask32x8) Mask32x8
-// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX
-func (x Float32x4) FusedMultiplyAddSub231(y Float32x4, z Float32x4) Float32x4
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedEqual(y Float32x16, z Mask32x16) Mask32x16
-// FusedMultiplySub132 performs `(v1 * v3) - v2`.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX
-func (x Float32x4) FusedMultiplySub132(y Float32x4, z Float32x4) Float32x4
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedEqual(y Float64x2, z Mask64x2) Mask64x2
-// FusedMultiplySub213 performs `(v2 * v1) - v3`.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX
-func (x Float32x4) FusedMultiplySub213(y Float32x4, z Float32x4) Float32x4
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedEqual(y Float64x4, z Mask64x4) Mask64x4
-// FusedMultiplySub231 performs `(v2 * v3) - v1`.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX
-func (x Float32x4) FusedMultiplySub231(y Float32x4, z Float32x4) Float32x4
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedEqual(y Float64x8, z Mask64x8) Mask64x8
-// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX
-func (x Float32x4) FusedMultiplySubAdd132(y Float32x4, z Float32x4) Float32x4
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x16) MaskedEqual(y Int8x16, z Mask8x16) Mask8x16
-// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX
-func (x Float32x4) FusedMultiplySubAdd213(y Float32x4, z Float32x4) Float32x4
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x32) MaskedEqual(y Int8x32, z Mask8x32) Mask8x32
-// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX
-func (x Float32x4) FusedMultiplySubAdd231(y Float32x4, z Float32x4) Float32x4
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x64) MaskedEqual(y Int8x64, z Mask8x64) Mask8x64
-// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX
-func (x Float32x4) FusedNegativeMultiplyAdd132(y Float32x4, z Float32x4) Float32x4
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedEqual(y Int16x8, z Mask16x8) Mask16x8
-// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX
-func (x Float32x4) FusedNegativeMultiplyAdd213(y Float32x4, z Float32x4) Float32x4
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedEqual(y Int16x16, z Mask16x16) Mask16x16
-// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX
-func (x Float32x4) FusedNegativeMultiplyAdd231(y Float32x4, z Float32x4) Float32x4
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedEqual(y Int16x32, z Mask16x32) Mask16x32
-// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX
-func (x Float32x4) FusedNegativeMultiplySub132(y Float32x4, z Float32x4) Float32x4
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedEqual(y Int32x4, z Mask32x4) Mask32x4
-// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX
-func (x Float32x4) FusedNegativeMultiplySub213(y Float32x4, z Float32x4) Float32x4
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedEqual(y Int32x8, z Mask32x8) Mask32x8
-// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX
-func (x Float32x4) FusedNegativeMultiplySub231(y Float32x4, z Float32x4) Float32x4
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedEqual(y Int32x16, z Mask32x16) Mask32x16
-// Add adds corresponding elements of two vectors.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VADDPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedAdd(y Float32x4, z Mask32x4) Float32x4
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedEqual(y Int64x2, z Mask64x2) Mask64x2
-// And performs a masked bitwise AND operation between two vectors.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VANDPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedAnd(y Float32x4, z Mask32x4) Float32x4
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedEqual(y Int64x4, z Mask64x4) Mask64x4
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VANDNPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedAndNot(y Float32x4, z Mask32x4) Float32x4
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedEqual(y Int64x8, z Mask64x8) Mask64x8
-// Div divides elements of two vectors.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VDIVPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedDiv(y Float32x4, z Mask32x4) Float32x4
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x16) MaskedEqual(y Uint8x16, z Mask8x16) Mask8x16
// Equal compares for equality, masked.
// Const Immediate = 0.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedEqual(y Float32x4, z Mask32x4) Mask32x4
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x32) MaskedEqual(y Uint8x32, z Mask8x32) Mask8x32
-// Greater compares for greater than.
-// Const Immediate = 6.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedGreater(y Float32x4, z Mask32x4) Mask32x4
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x64) MaskedEqual(y Uint8x64, z Mask8x64) Mask8x64
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedGreaterEqual(y Float32x4, z Mask32x4) Mask32x4
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedEqual(y Uint16x8, z Mask16x8) Mask16x8
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
-// Const Immediate = 3.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedIsNan(y Float32x4, z Mask32x4) Mask32x4
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedEqual(y Uint16x16, z Mask16x16) Mask16x16
-// Less compares for less than.
-// Const Immediate = 1.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedLess(y Float32x4, z Mask32x4) Mask32x4
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedEqual(y Uint16x32, z Mask16x32) Mask16x32
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedLessEqual(y Float32x4, z Mask32x4) Mask32x4
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedEqual(y Uint32x4, z Mask32x4) Mask32x4
-// Max computes the maximum of corresponding elements.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VMAXPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedMax(y Float32x4, z Mask32x4) Float32x4
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedEqual(y Uint32x8, z Mask32x8) Mask32x8
-// Min computes the minimum of corresponding elements.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VMINPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedMin(y Float32x4, z Mask32x4) Float32x4
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedEqual(y Uint32x16, z Mask32x16) Mask32x16
-// Mul multiplies corresponding elements of two vectors, masked.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VMULPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedMul(y Float32x4, z Mask32x4) Float32x4
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedEqual(y Uint64x2, z Mask64x2) Mask64x2
-// MulByPowOf2 multiplies elements by a power of 2.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VSCALEFPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedMulByPowOf2(y Float32x4, z Mask32x4) Float32x4
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedEqual(y Uint64x4, z Mask64x4) Mask64x4
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// Equal compares for equality, masked.
+// Const Immediate = 0.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedNotEqual(y Float32x4, z Mask32x4) Mask32x4
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedEqual(y Uint64x8, z Mask64x8) Mask64x8
-// Or performs a masked bitwise OR operation between two vectors.
-//
-// Asm: VORPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedOr(y Float32x4, z Mask32x4) Float32x4
+/* MaskedFloorSuppressExceptionWithPrecision */
-// Sub subtracts corresponding elements of two vectors.
+// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
+// Const Immediate = 9.
//
-// Asm: VADDPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedSub(y Float32x4, z Mask32x4) Float32x4
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
-// Xor performs a masked bitwise XOR operation between two vectors.
+// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
+// Const Immediate = 9.
//
-// Asm: VXORPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedXor(y Float32x4, z Mask32x4) Float32x4
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
-// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
+// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
+// Const Immediate = 9.
//
-// Asm: VFMADD132PS, CPU Feature: AVX512EVEX
-func (x Float32x8) FusedMultiplyAdd132(y Float32x8, z Float32x8) Float32x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
-// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
+// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
+// Const Immediate = 9.
//
-// Asm: VFMADD213PS, CPU Feature: AVX512EVEX
-func (x Float32x8) FusedMultiplyAdd213(y Float32x8, z Float32x8) Float32x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
-// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
+// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
+// Const Immediate = 9.
//
-// Asm: VFMADD231PS, CPU Feature: AVX512EVEX
-func (x Float32x8) FusedMultiplyAdd231(y Float32x8, z Float32x8) Float32x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
-// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
+// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
+// Const Immediate = 9.
//
-// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX
-func (x Float32x8) FusedMultiplyAddSub132(y Float32x8, z Float32x8) Float32x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
-// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX
-func (x Float32x8) FusedMultiplyAddSub213(y Float32x8, z Float32x8) Float32x8
+/* MaskedFloorWithPrecision */
-// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
+// FloorWithPrecision rounds elements down with specified precision, masked.
+// Const Immediate = 1.
//
-// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX
-func (x Float32x8) FusedMultiplyAddSub231(y Float32x8, z Float32x8) Float32x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedFloorWithPrecision(imm uint8, y Mask32x4) Float32x4
-// FusedMultiplySub132 performs `(v1 * v3) - v2`.
+// FloorWithPrecision rounds elements down with specified precision, masked.
+// Const Immediate = 1.
//
-// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX
-func (x Float32x8) FusedMultiplySub132(y Float32x8, z Float32x8) Float32x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedFloorWithPrecision(imm uint8, y Mask32x8) Float32x8
-// FusedMultiplySub213 performs `(v2 * v1) - v3`.
+// FloorWithPrecision rounds elements down with specified precision, masked.
+// Const Immediate = 1.
//
-// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX
-func (x Float32x8) FusedMultiplySub213(y Float32x8, z Float32x8) Float32x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedFloorWithPrecision(imm uint8, y Mask32x16) Float32x16
-// FusedMultiplySub231 performs `(v2 * v3) - v1`.
+// FloorWithPrecision rounds elements down with specified precision, masked.
+// Const Immediate = 1.
//
-// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX
-func (x Float32x8) FusedMultiplySub231(y Float32x8, z Float32x8) Float32x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedFloorWithPrecision(imm uint8, y Mask64x2) Float64x2
-// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
+// FloorWithPrecision rounds elements down with specified precision, masked.
+// Const Immediate = 1.
//
-// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX
-func (x Float32x8) FusedMultiplySubAdd132(y Float32x8, z Float32x8) Float32x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedFloorWithPrecision(imm uint8, y Mask64x4) Float64x4
-// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
+// FloorWithPrecision rounds elements down with specified precision, masked.
+// Const Immediate = 1.
//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX
-func (x Float32x8) FusedMultiplySubAdd213(y Float32x8, z Float32x8) Float32x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedFloorWithPrecision(imm uint8, y Mask64x8) Float64x8
-// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
-//
-// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX
-func (x Float32x8) FusedMultiplySubAdd231(y Float32x8, z Float32x8) Float32x8
+/* MaskedFusedMultiplyAdd132 */
-// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
+// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
//
-// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX
-func (x Float32x8) FusedNegativeMultiplyAdd132(y Float32x8, z Float32x8) Float32x8
+// Asm: VFMADD132PS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedFusedMultiplyAdd132(y Float32x4, z Float32x4, u Mask32x4) Float32x4
-// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
+// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
//
-// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX
-func (x Float32x8) FusedNegativeMultiplyAdd213(y Float32x8, z Float32x8) Float32x8
+// Asm: VFMADD132PS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedFusedMultiplyAdd132(y Float32x8, z Float32x8, u Mask32x8) Float32x8
-// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
+// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
//
-// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX
-func (x Float32x8) FusedNegativeMultiplyAdd231(y Float32x8, z Float32x8) Float32x8
+// Asm: VFMADD132PS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedFusedMultiplyAdd132(y Float32x16, z Float32x16, u Mask32x16) Float32x16
-// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
+// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
//
-// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX
-func (x Float32x8) FusedNegativeMultiplySub132(y Float32x8, z Float32x8) Float32x8
+// Asm: VFMADD132PD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedFusedMultiplyAdd132(y Float64x2, z Float64x2, u Mask64x2) Float64x2
-// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
+// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
//
-// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX
-func (x Float32x8) FusedNegativeMultiplySub213(y Float32x8, z Float32x8) Float32x8
+// Asm: VFMADD132PD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedFusedMultiplyAdd132(y Float64x4, z Float64x4, u Mask64x4) Float64x4
-// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
+// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
//
-// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX
-func (x Float32x8) FusedNegativeMultiplySub231(y Float32x8, z Float32x8) Float32x8
+// Asm: VFMADD132PD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedFusedMultiplyAdd132(y Float64x8, z Float64x8, u Mask64x8) Float64x8
-// Add adds corresponding elements of two vectors.
-//
-// Asm: VADDPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedAdd(y Float32x8, z Mask32x8) Float32x8
+/* MaskedFusedMultiplyAdd213 */
-// And performs a masked bitwise AND operation between two vectors.
+// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
//
-// Asm: VANDPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedAnd(y Float32x8, z Mask32x8) Float32x8
-
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
-//
-// Asm: VANDNPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedAndNot(y Float32x8, z Mask32x8) Float32x8
+// Asm: VFMADD213PS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedFusedMultiplyAdd213(y Float32x4, z Float32x4, u Mask32x4) Float32x4
-// Div divides elements of two vectors.
+// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
//
-// Asm: VDIVPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedDiv(y Float32x8, z Mask32x8) Float32x8
+// Asm: VFMADD213PS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedFusedMultiplyAdd213(y Float32x8, z Float32x8, u Mask32x8) Float32x8
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedEqual(y Float32x8, z Mask32x8) Mask32x8
+// Asm: VFMADD213PS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedFusedMultiplyAdd213(y Float32x16, z Float32x16, u Mask32x16) Float32x16
-// Greater compares for greater than.
-// Const Immediate = 6.
+// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedGreater(y Float32x8, z Mask32x8) Mask32x8
+// Asm: VFMADD213PD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedFusedMultiplyAdd213(y Float64x2, z Float64x2, u Mask64x2) Float64x2
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedGreaterEqual(y Float32x8, z Mask32x8) Mask32x8
+// Asm: VFMADD213PD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedFusedMultiplyAdd213(y Float64x4, z Float64x4, u Mask64x4) Float64x4
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
-// Const Immediate = 3.
+// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedIsNan(y Float32x8, z Mask32x8) Mask32x8
+// Asm: VFMADD213PD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedFusedMultiplyAdd213(y Float64x8, z Float64x8, u Mask64x8) Float64x8
-// Less compares for less than.
-// Const Immediate = 1.
-//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedLess(y Float32x8, z Mask32x8) Mask32x8
+/* MaskedFusedMultiplyAdd231 */
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedLessEqual(y Float32x8, z Mask32x8) Mask32x8
+// Asm: VFMADD231PS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedFusedMultiplyAdd231(y Float32x4, z Float32x4, u Mask32x4) Float32x4
-// Max computes the maximum of corresponding elements.
+// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
//
-// Asm: VMAXPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedMax(y Float32x8, z Mask32x8) Float32x8
+// Asm: VFMADD231PS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedFusedMultiplyAdd231(y Float32x8, z Float32x8, u Mask32x8) Float32x8
-// Min computes the minimum of corresponding elements.
+// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
//
-// Asm: VMINPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedMin(y Float32x8, z Mask32x8) Float32x8
+// Asm: VFMADD231PS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedFusedMultiplyAdd231(y Float32x16, z Float32x16, u Mask32x16) Float32x16
-// Mul multiplies corresponding elements of two vectors, masked.
+// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
//
-// Asm: VMULPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedMul(y Float32x8, z Mask32x8) Float32x8
+// Asm: VFMADD231PD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedFusedMultiplyAdd231(y Float64x2, z Float64x2, u Mask64x2) Float64x2
-// MulByPowOf2 multiplies elements by a power of 2.
+// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
//
-// Asm: VSCALEFPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedMulByPowOf2(y Float32x8, z Mask32x8) Float32x8
+// Asm: VFMADD231PD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedFusedMultiplyAdd231(y Float64x4, z Float64x4, u Mask64x4) Float64x4
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
//
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedNotEqual(y Float32x8, z Mask32x8) Mask32x8
+// Asm: VFMADD231PD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedFusedMultiplyAdd231(y Float64x8, z Float64x8, u Mask64x8) Float64x8
-// Or performs a masked bitwise OR operation between two vectors.
-//
-// Asm: VORPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedOr(y Float32x8, z Mask32x8) Float32x8
+/* MaskedFusedMultiplyAddSub132 */
-// Sub subtracts corresponding elements of two vectors.
+// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
//
-// Asm: VADDPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedSub(y Float32x8, z Mask32x8) Float32x8
+// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedFusedMultiplyAddSub132(y Float32x4, z Float32x4, u Mask32x4) Float32x4
-// Xor performs a masked bitwise XOR operation between two vectors.
+// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
//
-// Asm: VXORPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedXor(y Float32x8, z Mask32x8) Float32x8
+// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedFusedMultiplyAddSub132(y Float32x8, z Float32x8, u Mask32x8) Float32x8
-// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
+// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
//
-// Asm: VFMADD132PD, CPU Feature: AVX512EVEX
-func (x Float64x2) FusedMultiplyAdd132(y Float64x2, z Float64x2) Float64x2
+// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedFusedMultiplyAddSub132(y Float32x16, z Float32x16, u Mask32x16) Float32x16
-// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
+// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
//
-// Asm: VFMADD213PD, CPU Feature: AVX512EVEX
-func (x Float64x2) FusedMultiplyAdd213(y Float64x2, z Float64x2) Float64x2
+// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedFusedMultiplyAddSub132(y Float64x2, z Float64x2, u Mask64x2) Float64x2
-// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
+// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
//
-// Asm: VFMADD231PD, CPU Feature: AVX512EVEX
-func (x Float64x2) FusedMultiplyAdd231(y Float64x2, z Float64x2) Float64x2
+// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedFusedMultiplyAddSub132(y Float64x4, z Float64x4, u Mask64x4) Float64x4
// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
//
// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX
-func (x Float64x2) FusedMultiplyAddSub132(y Float64x2, z Float64x2) Float64x2
+func (x Float64x8) MaskedFusedMultiplyAddSub132(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+
+/* MaskedFusedMultiplyAddSub213 */
// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX
-func (x Float64x2) FusedMultiplyAddSub213(y Float64x2, z Float64x2) Float64x2
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedFusedMultiplyAddSub213(y Float32x4, z Float32x4, u Mask32x4) Float32x4
-// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
+// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
//
-// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX
-func (x Float64x2) FusedMultiplyAddSub231(y Float64x2, z Float64x2) Float64x2
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedFusedMultiplyAddSub213(y Float32x8, z Float32x8, u Mask32x8) Float32x8
-// FusedMultiplySub132 performs `(v1 * v3) - v2`.
+// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
//
-// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX
-func (x Float64x2) FusedMultiplySub132(y Float64x2, z Float64x2) Float64x2
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedFusedMultiplyAddSub213(y Float32x16, z Float32x16, u Mask32x16) Float32x16
-// FusedMultiplySub213 performs `(v2 * v1) - v3`.
+// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
//
-// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX
-func (x Float64x2) FusedMultiplySub213(y Float64x2, z Float64x2) Float64x2
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedFusedMultiplyAddSub213(y Float64x2, z Float64x2, u Mask64x2) Float64x2
-// FusedMultiplySub231 performs `(v2 * v3) - v1`.
+// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
//
-// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX
-func (x Float64x2) FusedMultiplySub231(y Float64x2, z Float64x2) Float64x2
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedFusedMultiplyAddSub213(y Float64x4, z Float64x4, u Mask64x4) Float64x4
-// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
+// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
//
-// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX
-func (x Float64x2) FusedMultiplySubAdd132(y Float64x2, z Float64x2) Float64x2
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedFusedMultiplyAddSub213(y Float64x8, z Float64x8, u Mask64x8) Float64x8
-// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
+/* MaskedFusedMultiplyAddSub231 */
+
+// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX
-func (x Float64x2) FusedMultiplySubAdd213(y Float64x2, z Float64x2) Float64x2
+// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedFusedMultiplyAddSub231(y Float32x4, z Float32x4, u Mask32x4) Float32x4
-// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
+// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
//
-// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX
-func (x Float64x2) FusedMultiplySubAdd231(y Float64x2, z Float64x2) Float64x2
+// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedFusedMultiplyAddSub231(y Float32x8, z Float32x8, u Mask32x8) Float32x8
-// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
+// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
//
-// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX
-func (x Float64x2) FusedNegativeMultiplyAdd132(y Float64x2, z Float64x2) Float64x2
+// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedFusedMultiplyAddSub231(y Float32x16, z Float32x16, u Mask32x16) Float32x16
-// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
+// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
//
-// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX
-func (x Float64x2) FusedNegativeMultiplyAdd213(y Float64x2, z Float64x2) Float64x2
+// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedFusedMultiplyAddSub231(y Float64x2, z Float64x2, u Mask64x2) Float64x2
-// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
+// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
//
-// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX
-func (x Float64x2) FusedNegativeMultiplyAdd231(y Float64x2, z Float64x2) Float64x2
+// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedFusedMultiplyAddSub231(y Float64x4, z Float64x4, u Mask64x4) Float64x4
-// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
+// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
//
-// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX
-func (x Float64x2) FusedNegativeMultiplySub132(y Float64x2, z Float64x2) Float64x2
+// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedFusedMultiplyAddSub231(y Float64x8, z Float64x8, u Mask64x8) Float64x8
-// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
+/* MaskedFusedMultiplySub132 */
+
+// FusedMultiplySub132 performs `(v1 * v3) - v2`.
//
-// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX
-func (x Float64x2) FusedNegativeMultiplySub213(y Float64x2, z Float64x2) Float64x2
+// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedFusedMultiplySub132(y Float32x4, z Float32x4, u Mask32x4) Float32x4
-// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
+// FusedMultiplySub132 performs `(v1 * v3) - v2`.
//
-// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX
-func (x Float64x2) FusedNegativeMultiplySub231(y Float64x2, z Float64x2) Float64x2
+// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedFusedMultiplySub132(y Float32x8, z Float32x8, u Mask32x8) Float32x8
-// Add adds corresponding elements of two vectors.
+// FusedMultiplySub132 performs `(v1 * v3) - v2`.
//
-// Asm: VADDPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedAdd(y Float64x2, z Mask64x2) Float64x2
+// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedFusedMultiplySub132(y Float32x16, z Float32x16, u Mask32x16) Float32x16
-// And performs a masked bitwise AND operation between two vectors.
+// FusedMultiplySub132 performs `(v1 * v3) - v2`.
//
-// Asm: VANDPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedAnd(y Float64x2, z Mask64x2) Float64x2
+// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedFusedMultiplySub132(y Float64x2, z Float64x2, u Mask64x2) Float64x2
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// FusedMultiplySub132 performs `(v1 * v3) - v2`.
//
-// Asm: VANDNPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedAndNot(y Float64x2, z Mask64x2) Float64x2
+// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedFusedMultiplySub132(y Float64x4, z Float64x4, u Mask64x4) Float64x4
-// Div divides elements of two vectors.
+// FusedMultiplySub132 performs `(v1 * v3) - v2`.
//
-// Asm: VDIVPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedDiv(y Float64x2, z Mask64x2) Float64x2
+// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedFusedMultiplySub132(y Float64x8, z Float64x8, u Mask64x8) Float64x8
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+/* MaskedFusedMultiplySub213 */
+
+// FusedMultiplySub213 performs `(v2 * v1) - v3`.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedEqual(y Float64x2, z Mask64x2) Mask64x2
+// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedFusedMultiplySub213(y Float32x4, z Float32x4, u Mask32x4) Float32x4
-// Greater compares for greater than.
-// Const Immediate = 6.
+// FusedMultiplySub213 performs `(v2 * v1) - v3`.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedGreater(y Float64x2, z Mask64x2) Mask64x2
+// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedFusedMultiplySub213(y Float32x8, z Float32x8, u Mask32x8) Float32x8
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// FusedMultiplySub213 performs `(v2 * v1) - v3`.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedGreaterEqual(y Float64x2, z Mask64x2) Mask64x2
+// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedFusedMultiplySub213(y Float32x16, z Float32x16, u Mask32x16) Float32x16
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
-// Const Immediate = 3.
+// FusedMultiplySub213 performs `(v2 * v1) - v3`.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedIsNan(y Float64x2, z Mask64x2) Mask64x2
+// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedFusedMultiplySub213(y Float64x2, z Float64x2, u Mask64x2) Float64x2
-// Less compares for less than.
-// Const Immediate = 1.
+// FusedMultiplySub213 performs `(v2 * v1) - v3`.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedLess(y Float64x2, z Mask64x2) Mask64x2
+// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedFusedMultiplySub213(y Float64x4, z Float64x4, u Mask64x4) Float64x4
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// FusedMultiplySub213 performs `(v2 * v1) - v3`.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedLessEqual(y Float64x2, z Mask64x2) Mask64x2
+// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedFusedMultiplySub213(y Float64x8, z Float64x8, u Mask64x8) Float64x8
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VMAXPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedMax(y Float64x2, z Mask64x2) Float64x2
+/* MaskedFusedMultiplySub231 */
-// Min computes the minimum of corresponding elements.
+// FusedMultiplySub231 performs `(v2 * v3) - v1`.
//
-// Asm: VMINPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedMin(y Float64x2, z Mask64x2) Float64x2
+// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedFusedMultiplySub231(y Float32x4, z Float32x4, u Mask32x4) Float32x4
-// Mul multiplies corresponding elements of two vectors, masked.
+// FusedMultiplySub231 performs `(v2 * v3) - v1`.
//
-// Asm: VMULPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedMul(y Float64x2, z Mask64x2) Float64x2
+// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedFusedMultiplySub231(y Float32x8, z Float32x8, u Mask32x8) Float32x8
-// MulByPowOf2 multiplies elements by a power of 2.
+// FusedMultiplySub231 performs `(v2 * v3) - v1`.
//
-// Asm: VSCALEFPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedMulByPowOf2(y Float64x2, z Mask64x2) Float64x2
+// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedFusedMultiplySub231(y Float32x16, z Float32x16, u Mask32x16) Float32x16
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// FusedMultiplySub231 performs `(v2 * v3) - v1`.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedNotEqual(y Float64x2, z Mask64x2) Mask64x2
+// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedFusedMultiplySub231(y Float64x2, z Float64x2, u Mask64x2) Float64x2
-// Or performs a masked bitwise OR operation between two vectors.
+// FusedMultiplySub231 performs `(v2 * v3) - v1`.
//
-// Asm: VORPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedOr(y Float64x2, z Mask64x2) Float64x2
+// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedFusedMultiplySub231(y Float64x4, z Float64x4, u Mask64x4) Float64x4
-// Sub subtracts corresponding elements of two vectors.
+// FusedMultiplySub231 performs `(v2 * v3) - v1`.
//
-// Asm: VADDPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedSub(y Float64x2, z Mask64x2) Float64x2
+// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedFusedMultiplySub231(y Float64x8, z Float64x8, u Mask64x8) Float64x8
-// Xor performs a masked bitwise XOR operation between two vectors.
+/* MaskedFusedMultiplySubAdd132 */
+
+// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
//
-// Asm: VXORPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedXor(y Float64x2, z Mask64x2) Float64x2
+// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedFusedMultiplySubAdd132(y Float32x4, z Float32x4, u Mask32x4) Float32x4
-// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
+// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
//
-// Asm: VFMADD132PD, CPU Feature: AVX512EVEX
-func (x Float64x4) FusedMultiplyAdd132(y Float64x4, z Float64x4) Float64x4
+// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedFusedMultiplySubAdd132(y Float32x8, z Float32x8, u Mask32x8) Float32x8
-// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
+// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
//
-// Asm: VFMADD213PD, CPU Feature: AVX512EVEX
-func (x Float64x4) FusedMultiplyAdd213(y Float64x4, z Float64x4) Float64x4
+// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedFusedMultiplySubAdd132(y Float32x16, z Float32x16, u Mask32x16) Float32x16
-// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
+// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
//
-// Asm: VFMADD231PD, CPU Feature: AVX512EVEX
-func (x Float64x4) FusedMultiplyAdd231(y Float64x4, z Float64x4) Float64x4
+// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedFusedMultiplySubAdd132(y Float64x2, z Float64x2, u Mask64x2) Float64x2
-// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
+// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
//
-// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX
-func (x Float64x4) FusedMultiplyAddSub132(y Float64x4, z Float64x4) Float64x4
+// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedFusedMultiplySubAdd132(y Float64x4, z Float64x4, u Mask64x4) Float64x4
-// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
+// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX
-func (x Float64x4) FusedMultiplyAddSub213(y Float64x4, z Float64x4) Float64x4
+// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedFusedMultiplySubAdd132(y Float64x8, z Float64x8, u Mask64x8) Float64x8
-// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
+/* MaskedFusedMultiplySubAdd213 */
+
+// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
//
-// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX
-func (x Float64x4) FusedMultiplyAddSub231(y Float64x4, z Float64x4) Float64x4
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedFusedMultiplySubAdd213(y Float32x4, z Float32x4, u Mask32x4) Float32x4
-// FusedMultiplySub132 performs `(v1 * v3) - v2`.
+// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
//
-// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX
-func (x Float64x4) FusedMultiplySub132(y Float64x4, z Float64x4) Float64x4
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedFusedMultiplySubAdd213(y Float32x8, z Float32x8, u Mask32x8) Float32x8
-// FusedMultiplySub213 performs `(v2 * v1) - v3`.
+// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
//
-// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX
-func (x Float64x4) FusedMultiplySub213(y Float64x4, z Float64x4) Float64x4
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedFusedMultiplySubAdd213(y Float32x16, z Float32x16, u Mask32x16) Float32x16
-// FusedMultiplySub231 performs `(v2 * v3) - v1`.
+// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
//
-// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX
-func (x Float64x4) FusedMultiplySub231(y Float64x4, z Float64x4) Float64x4
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedFusedMultiplySubAdd213(y Float64x2, z Float64x2, u Mask64x2) Float64x2
-// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
+// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
//
-// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX
-func (x Float64x4) FusedMultiplySubAdd132(y Float64x4, z Float64x4) Float64x4
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedFusedMultiplySubAdd213(y Float64x4, z Float64x4, u Mask64x4) Float64x4
// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
//
// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX
-func (x Float64x4) FusedMultiplySubAdd213(y Float64x4, z Float64x4) Float64x4
+func (x Float64x8) MaskedFusedMultiplySubAdd213(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+
+/* MaskedFusedMultiplySubAdd231 */
// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
//
-// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX
-func (x Float64x4) FusedMultiplySubAdd231(y Float64x4, z Float64x4) Float64x4
+// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedFusedMultiplySubAdd231(y Float32x4, z Float32x4, u Mask32x4) Float32x4
-// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
+// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
//
-// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX
-func (x Float64x4) FusedNegativeMultiplyAdd132(y Float64x4, z Float64x4) Float64x4
+// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedFusedMultiplySubAdd231(y Float32x8, z Float32x8, u Mask32x8) Float32x8
-// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
+// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
//
-// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX
-func (x Float64x4) FusedNegativeMultiplyAdd213(y Float64x4, z Float64x4) Float64x4
+// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedFusedMultiplySubAdd231(y Float32x16, z Float32x16, u Mask32x16) Float32x16
-// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
+// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
//
-// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX
-func (x Float64x4) FusedNegativeMultiplyAdd231(y Float64x4, z Float64x4) Float64x4
+// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedFusedMultiplySubAdd231(y Float64x2, z Float64x2, u Mask64x2) Float64x2
-// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
+// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
//
-// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX
-func (x Float64x4) FusedNegativeMultiplySub132(y Float64x4, z Float64x4) Float64x4
+// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedFusedMultiplySubAdd231(y Float64x4, z Float64x4, u Mask64x4) Float64x4
-// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
+// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
//
-// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX
-func (x Float64x4) FusedNegativeMultiplySub213(y Float64x4, z Float64x4) Float64x4
+// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedFusedMultiplySubAdd231(y Float64x8, z Float64x8, u Mask64x8) Float64x8
-// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
-//
-// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX
-func (x Float64x4) FusedNegativeMultiplySub231(y Float64x4, z Float64x4) Float64x4
+/* MaskedFusedNegativeMultiplyAdd132 */
-// Add adds corresponding elements of two vectors.
+// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
//
-// Asm: VADDPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedAdd(y Float64x4, z Mask64x4) Float64x4
+// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedFusedNegativeMultiplyAdd132(y Float32x4, z Float32x4, u Mask32x4) Float32x4
-// And performs a masked bitwise AND operation between two vectors.
+// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
//
-// Asm: VANDPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedAnd(y Float64x4, z Mask64x4) Float64x4
+// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedFusedNegativeMultiplyAdd132(y Float32x8, z Float32x8, u Mask32x8) Float32x8
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
//
-// Asm: VANDNPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedAndNot(y Float64x4, z Mask64x4) Float64x4
+// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedFusedNegativeMultiplyAdd132(y Float32x16, z Float32x16, u Mask32x16) Float32x16
-// Div divides elements of two vectors.
+// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
//
-// Asm: VDIVPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedDiv(y Float64x4, z Mask64x4) Float64x4
+// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedFusedNegativeMultiplyAdd132(y Float64x2, z Float64x2, u Mask64x2) Float64x2
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedEqual(y Float64x4, z Mask64x4) Mask64x4
+// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedFusedNegativeMultiplyAdd132(y Float64x4, z Float64x4, u Mask64x4) Float64x4
-// Greater compares for greater than.
-// Const Immediate = 6.
+// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedGreater(y Float64x4, z Mask64x4) Mask64x4
+// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedFusedNegativeMultiplyAdd132(y Float64x8, z Float64x8, u Mask64x8) Float64x8
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+/* MaskedFusedNegativeMultiplyAdd213 */
+
+// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedGreaterEqual(y Float64x4, z Mask64x4) Mask64x4
+// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedFusedNegativeMultiplyAdd213(y Float32x4, z Float32x4, u Mask32x4) Float32x4
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
-// Const Immediate = 3.
+// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedIsNan(y Float64x4, z Mask64x4) Mask64x4
+// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedFusedNegativeMultiplyAdd213(y Float32x8, z Float32x8, u Mask32x8) Float32x8
-// Less compares for less than.
-// Const Immediate = 1.
+// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedLess(y Float64x4, z Mask64x4) Mask64x4
+// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedFusedNegativeMultiplyAdd213(y Float32x16, z Float32x16, u Mask32x16) Float32x16
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedLessEqual(y Float64x4, z Mask64x4) Mask64x4
+// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedFusedNegativeMultiplyAdd213(y Float64x2, z Float64x2, u Mask64x2) Float64x2
-// Max computes the maximum of corresponding elements.
+// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
//
-// Asm: VMAXPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedMax(y Float64x4, z Mask64x4) Float64x4
+// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedFusedNegativeMultiplyAdd213(y Float64x4, z Float64x4, u Mask64x4) Float64x4
-// Min computes the minimum of corresponding elements.
+// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
//
-// Asm: VMINPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedMin(y Float64x4, z Mask64x4) Float64x4
+// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedFusedNegativeMultiplyAdd213(y Float64x8, z Float64x8, u Mask64x8) Float64x8
-// Mul multiplies corresponding elements of two vectors, masked.
+/* MaskedFusedNegativeMultiplyAdd231 */
+
+// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
//
-// Asm: VMULPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedMul(y Float64x4, z Mask64x4) Float64x4
+// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedFusedNegativeMultiplyAdd231(y Float32x4, z Float32x4, u Mask32x4) Float32x4
-// MulByPowOf2 multiplies elements by a power of 2.
+// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
//
-// Asm: VSCALEFPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedMulByPowOf2(y Float64x4, z Mask64x4) Float64x4
+// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedFusedNegativeMultiplyAdd231(y Float32x8, z Float32x8, u Mask32x8) Float32x8
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedNotEqual(y Float64x4, z Mask64x4) Mask64x4
+// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedFusedNegativeMultiplyAdd231(y Float32x16, z Float32x16, u Mask32x16) Float32x16
-// Or performs a masked bitwise OR operation between two vectors.
+// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
//
-// Asm: VORPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedOr(y Float64x4, z Mask64x4) Float64x4
+// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedFusedNegativeMultiplyAdd231(y Float64x2, z Float64x2, u Mask64x2) Float64x2
-// Sub subtracts corresponding elements of two vectors.
+// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
//
-// Asm: VADDPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedSub(y Float64x4, z Mask64x4) Float64x4
+// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedFusedNegativeMultiplyAdd231(y Float64x4, z Float64x4, u Mask64x4) Float64x4
-// Xor performs a masked bitwise XOR operation between two vectors.
+// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
//
-// Asm: VXORPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedXor(y Float64x4, z Mask64x4) Float64x4
+// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedFusedNegativeMultiplyAdd231(y Float64x8, z Float64x8, u Mask64x8) Float64x8
-// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
-//
-// Asm: VFMADD132PD, CPU Feature: AVX512EVEX
-func (x Float64x8) FusedMultiplyAdd132(y Float64x8, z Float64x8) Float64x8
+/* MaskedFusedNegativeMultiplySub132 */
-// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
+// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
//
-// Asm: VFMADD213PD, CPU Feature: AVX512EVEX
-func (x Float64x8) FusedMultiplyAdd213(y Float64x8, z Float64x8) Float64x8
+// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedFusedNegativeMultiplySub132(y Float32x4, z Float32x4, u Mask32x4) Float32x4
-// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
+// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
//
-// Asm: VFMADD231PD, CPU Feature: AVX512EVEX
-func (x Float64x8) FusedMultiplyAdd231(y Float64x8, z Float64x8) Float64x8
+// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedFusedNegativeMultiplySub132(y Float32x8, z Float32x8, u Mask32x8) Float32x8
-// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
+// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
//
-// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX
-func (x Float64x8) FusedMultiplyAddSub132(y Float64x8, z Float64x8) Float64x8
+// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedFusedNegativeMultiplySub132(y Float32x16, z Float32x16, u Mask32x16) Float32x16
-// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
+// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX
-func (x Float64x8) FusedMultiplyAddSub213(y Float64x8, z Float64x8) Float64x8
+// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedFusedNegativeMultiplySub132(y Float64x2, z Float64x2, u Mask64x2) Float64x2
-// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
+// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
//
-// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX
-func (x Float64x8) FusedMultiplyAddSub231(y Float64x8, z Float64x8) Float64x8
+// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedFusedNegativeMultiplySub132(y Float64x4, z Float64x4, u Mask64x4) Float64x4
-// FusedMultiplySub132 performs `(v1 * v3) - v2`.
+// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
//
-// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX
-func (x Float64x8) FusedMultiplySub132(y Float64x8, z Float64x8) Float64x8
+// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedFusedNegativeMultiplySub132(y Float64x8, z Float64x8, u Mask64x8) Float64x8
-// FusedMultiplySub213 performs `(v2 * v1) - v3`.
-//
-// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX
-func (x Float64x8) FusedMultiplySub213(y Float64x8, z Float64x8) Float64x8
+/* MaskedFusedNegativeMultiplySub213 */
-// FusedMultiplySub231 performs `(v2 * v3) - v1`.
+// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
//
-// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX
-func (x Float64x8) FusedMultiplySub231(y Float64x8, z Float64x8) Float64x8
+// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedFusedNegativeMultiplySub213(y Float32x4, z Float32x4, u Mask32x4) Float32x4
-// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
+// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
//
-// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX
-func (x Float64x8) FusedMultiplySubAdd132(y Float64x8, z Float64x8) Float64x8
+// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedFusedNegativeMultiplySub213(y Float32x8, z Float32x8, u Mask32x8) Float32x8
-// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
+// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX
-func (x Float64x8) FusedMultiplySubAdd213(y Float64x8, z Float64x8) Float64x8
+// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedFusedNegativeMultiplySub213(y Float32x16, z Float32x16, u Mask32x16) Float32x16
-// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
+// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
//
-// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX
-func (x Float64x8) FusedMultiplySubAdd231(y Float64x8, z Float64x8) Float64x8
+// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedFusedNegativeMultiplySub213(y Float64x2, z Float64x2, u Mask64x2) Float64x2
-// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
+// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
//
-// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX
-func (x Float64x8) FusedNegativeMultiplyAdd132(y Float64x8, z Float64x8) Float64x8
+// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedFusedNegativeMultiplySub213(y Float64x4, z Float64x4, u Mask64x4) Float64x4
-// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
+// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
//
-// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX
-func (x Float64x8) FusedNegativeMultiplyAdd213(y Float64x8, z Float64x8) Float64x8
+// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedFusedNegativeMultiplySub213(y Float64x8, z Float64x8, u Mask64x8) Float64x8
-// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
+/* MaskedFusedNegativeMultiplySub231 */
+
+// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
//
-// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX
-func (x Float64x8) FusedNegativeMultiplyAdd231(y Float64x8, z Float64x8) Float64x8
+// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedFusedNegativeMultiplySub231(y Float32x4, z Float32x4, u Mask32x4) Float32x4
-// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
+// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
//
-// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX
-func (x Float64x8) FusedNegativeMultiplySub132(y Float64x8, z Float64x8) Float64x8
+// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedFusedNegativeMultiplySub231(y Float32x8, z Float32x8, u Mask32x8) Float32x8
-// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
+// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
//
-// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX
-func (x Float64x8) FusedNegativeMultiplySub213(y Float64x8, z Float64x8) Float64x8
+// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedFusedNegativeMultiplySub231(y Float32x16, z Float32x16, u Mask32x16) Float32x16
// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
//
// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX
-func (x Float64x8) FusedNegativeMultiplySub231(y Float64x8, z Float64x8) Float64x8
+func (x Float64x2) MaskedFusedNegativeMultiplySub231(y Float64x2, z Float64x2, u Mask64x2) Float64x2
-// Add adds corresponding elements of two vectors.
+// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
//
-// Asm: VADDPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedAdd(y Float64x8, z Mask64x8) Float64x8
+// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedFusedNegativeMultiplySub231(y Float64x4, z Float64x4, u Mask64x4) Float64x4
-// And performs a masked bitwise AND operation between two vectors.
+// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
//
-// Asm: VANDPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedAnd(y Float64x8, z Mask64x8) Float64x8
+// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedFusedNegativeMultiplySub231(y Float64x8, z Float64x8, u Mask64x8) Float64x8
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
-//
-// Asm: VANDNPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedAndNot(y Float64x8, z Mask64x8) Float64x8
+/* MaskedGreater */
-// Div divides elements of two vectors.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VDIVPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedDiv(y Float64x8, z Mask64x8) Float64x8
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedGreater(y Float32x4, z Mask32x4) Mask32x4
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedEqual(y Float64x8, z Mask64x8) Mask64x8
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedGreater(y Float32x8, z Mask32x8) Mask32x8
// Greater compares for greater than.
// Const Immediate = 6.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedGreater(y Float64x8, z Mask64x8) Mask64x8
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedGreater(y Float32x16, z Mask32x16) Mask32x16
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedGreaterEqual(y Float64x8, z Mask64x8) Mask64x8
+func (x Float64x2) MaskedGreater(y Float64x2, z Mask64x2) Mask64x2
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
-// Const Immediate = 3.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedIsNan(y Float64x8, z Mask64x8) Mask64x8
+func (x Float64x4) MaskedGreater(y Float64x4, z Mask64x4) Mask64x4
-// Less compares for less than.
-// Const Immediate = 1.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedLess(y Float64x8, z Mask64x8) Mask64x8
+func (x Float64x8) MaskedGreater(y Float64x8, z Mask64x8) Mask64x8
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedLessEqual(y Float64x8, z Mask64x8) Mask64x8
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x16) MaskedGreater(y Int8x16, z Mask8x16) Mask8x16
-// Max computes the maximum of corresponding elements.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VMAXPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedMax(y Float64x8, z Mask64x8) Float64x8
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x32) MaskedGreater(y Int8x32, z Mask8x32) Mask8x32
-// Min computes the minimum of corresponding elements.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VMINPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedMin(y Float64x8, z Mask64x8) Float64x8
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x64) MaskedGreater(y Int8x64, z Mask8x64) Mask8x64
-// Mul multiplies corresponding elements of two vectors, masked.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VMULPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedMul(y Float64x8, z Mask64x8) Float64x8
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedGreater(y Int16x8, z Mask16x8) Mask16x8
-// MulByPowOf2 multiplies elements by a power of 2.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VSCALEFPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedMulByPowOf2(y Float64x8, z Mask64x8) Float64x8
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedGreater(y Int16x16, z Mask16x16) Mask16x16
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedNotEqual(y Float64x8, z Mask64x8) Mask64x8
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedGreater(y Int16x32, z Mask16x32) Mask16x32
-// Or performs a masked bitwise OR operation between two vectors.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VORPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedOr(y Float64x8, z Mask64x8) Float64x8
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedGreater(y Int32x4, z Mask32x4) Mask32x4
-// Sub subtracts corresponding elements of two vectors.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VADDPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedSub(y Float64x8, z Mask64x8) Float64x8
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedGreater(y Int32x8, z Mask32x8) Mask32x8
-// Xor performs a masked bitwise XOR operation between two vectors.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VXORPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedXor(y Float64x8, z Mask64x8) Float64x8
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedGreater(y Int32x16, z Mask32x16) Mask32x16
-// Add adds corresponding elements of two vectors.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPADDW, CPU Feature: AVX512EVEX
-func (x Int16x16) MaskedAdd(y Int16x16, z Mask16x16) Int16x16
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedGreater(y Int64x2, z Mask64x2) Mask64x2
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPCMPEQW, CPU Feature: AVX512EVEX
-func (x Int16x16) MaskedEqual(y Int16x16, z Mask16x16) Mask16x16
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedGreater(y Int64x4, z Mask64x4) Mask64x4
// Greater compares for greater than.
// Const Immediate = 6.
//
-// Asm: VPCMPGTW, CPU Feature: AVX512EVEX
-func (x Int16x16) MaskedGreater(y Int16x16, z Mask16x16) Mask16x16
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedGreater(y Int64x8, z Mask64x8) Mask64x8
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x16) MaskedGreaterEqual(y Int16x16, z Mask16x16) Mask16x16
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x16) MaskedGreater(y Uint8x16, z Mask8x16) Mask8x16
-// Less compares for less than.
-// Const Immediate = 1.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x16) MaskedLess(y Int16x16, z Mask16x16) Mask16x16
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x32) MaskedGreater(y Uint8x32, z Mask8x32) Mask8x32
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x16) MaskedLessEqual(y Int16x16, z Mask16x16) Mask16x16
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x64) MaskedGreater(y Uint8x64, z Mask8x64) Mask8x64
-// Max computes the maximum of corresponding elements.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPMAXSW, CPU Feature: AVX512EVEX
-func (x Int16x16) MaskedMax(y Int16x16, z Mask16x16) Int16x16
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedGreater(y Uint16x8, z Mask16x8) Mask16x8
-// Min computes the minimum of corresponding elements.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPMINSW, CPU Feature: AVX512EVEX
-func (x Int16x16) MaskedMin(y Int16x16, z Mask16x16) Int16x16
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedGreater(y Uint16x16, z Mask16x16) Mask16x16
-// MulHigh multiplies elements and stores the high part of the result, masked.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPMULHW, CPU Feature: AVX512EVEX
-func (x Int16x16) MaskedMulHigh(y Int16x16, z Mask16x16) Int16x16
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedGreater(y Uint16x32, z Mask16x32) Mask16x32
-// MulLow multiplies elements and stores the low part of the result, masked.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPMULLW, CPU Feature: AVX512EVEX
-func (x Int16x16) MaskedMulLow(y Int16x16, z Mask16x16) Int16x16
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedGreater(y Uint32x4, z Mask32x4) Mask32x4
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x16) MaskedNotEqual(y Int16x16, z Mask16x16) Mask16x16
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedGreater(y Uint32x8, z Mask32x8) Mask32x8
-// PairDotProd multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPMADDWD, CPU Feature: AVX512EVEX
-func (x Int16x16) MaskedPairDotProd(y Int16x16, z Mask16x16) Int32x8
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedGreater(y Uint32x16, z Mask32x16) Mask32x16
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPADDSW, CPU Feature: AVX512EVEX
-func (x Int16x16) MaskedSaturatedAdd(y Int16x16, z Mask16x16) Int16x16
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedGreater(y Uint64x2, z Mask64x2) Mask64x2
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPSUBSW, CPU Feature: AVX512EVEX
-func (x Int16x16) MaskedSaturatedSub(y Int16x16, z Mask16x16) Int16x16
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedGreater(y Uint64x4, z Mask64x4) Mask64x4
-// Sub subtracts corresponding elements of two vectors.
+// Greater compares for greater than.
+// Const Immediate = 6.
//
-// Asm: VPSUBW, CPU Feature: AVX512EVEX
-func (x Int16x16) MaskedSub(y Int16x16, z Mask16x16) Int16x16
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedGreater(y Uint64x8, z Mask64x8) Mask64x8
-// Add adds corresponding elements of two vectors.
-//
-// Asm: VPADDW, CPU Feature: AVX512EVEX
-func (x Int16x32) MaskedAdd(y Int16x32, z Mask16x32) Int16x32
+/* MaskedGreaterEqual */
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPCMPEQW, CPU Feature: AVX512EVEX
-func (x Int16x32) MaskedEqual(y Int16x32, z Mask16x32) Mask16x32
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedGreaterEqual(y Float32x4, z Mask32x4) Mask32x4
-// Greater compares for greater than.
-// Const Immediate = 6.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPCMPGTW, CPU Feature: AVX512EVEX
-func (x Int16x32) MaskedGreater(y Int16x32, z Mask16x32) Mask16x32
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedGreaterEqual(y Float32x8, z Mask32x8) Mask32x8
// GreaterEqual compares for greater than or equal.
// Const Immediate = 5.
//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x32) MaskedGreaterEqual(y Int16x32, z Mask16x32) Mask16x32
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedGreaterEqual(y Float32x16, z Mask32x16) Mask32x16
-// Less compares for less than.
-// Const Immediate = 1.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x32) MaskedLess(y Int16x32, z Mask16x32) Mask16x32
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedGreaterEqual(y Float64x2, z Mask64x2) Mask64x2
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x32) MaskedLessEqual(y Int16x32, z Mask16x32) Mask16x32
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedGreaterEqual(y Float64x4, z Mask64x4) Mask64x4
-// Max computes the maximum of corresponding elements.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPMAXSW, CPU Feature: AVX512EVEX
-func (x Int16x32) MaskedMax(y Int16x32, z Mask16x32) Int16x32
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedGreaterEqual(y Float64x8, z Mask64x8) Mask64x8
-// Min computes the minimum of corresponding elements.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPMINSW, CPU Feature: AVX512EVEX
-func (x Int16x32) MaskedMin(y Int16x32, z Mask16x32) Int16x32
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x16) MaskedGreaterEqual(y Int8x16, z Mask8x16) Mask8x16
-// MulHigh multiplies elements and stores the high part of the result, masked.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPMULHW, CPU Feature: AVX512EVEX
-func (x Int16x32) MaskedMulHigh(y Int16x32, z Mask16x32) Int16x32
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x32) MaskedGreaterEqual(y Int8x32, z Mask8x32) Mask8x32
-// MulLow multiplies elements and stores the low part of the result, masked.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPMULLW, CPU Feature: AVX512EVEX
-func (x Int16x32) MaskedMulLow(y Int16x32, z Mask16x32) Int16x32
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x64) MaskedGreaterEqual(y Int8x64, z Mask8x64) Mask8x64
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x32) MaskedNotEqual(y Int16x32, z Mask16x32) Mask16x32
+func (x Int16x8) MaskedGreaterEqual(y Int16x8, z Mask16x8) Mask16x8
-// PairDotProd multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPMADDWD, CPU Feature: AVX512EVEX
-func (x Int16x32) MaskedPairDotProd(y Int16x32, z Mask16x32) Int32x16
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedGreaterEqual(y Int16x16, z Mask16x16) Mask16x16
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPADDSW, CPU Feature: AVX512EVEX
-func (x Int16x32) MaskedSaturatedAdd(y Int16x32, z Mask16x32) Int16x32
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedGreaterEqual(y Int16x32, z Mask16x32) Mask16x32
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPSUBSW, CPU Feature: AVX512EVEX
-func (x Int16x32) MaskedSaturatedSub(y Int16x32, z Mask16x32) Int16x32
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedGreaterEqual(y Int32x4, z Mask32x4) Mask32x4
-// Sub subtracts corresponding elements of two vectors.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPSUBW, CPU Feature: AVX512EVEX
-func (x Int16x32) MaskedSub(y Int16x32, z Mask16x32) Int16x32
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedGreaterEqual(y Int32x8, z Mask32x8) Mask32x8
-// Add adds corresponding elements of two vectors.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPADDW, CPU Feature: AVX512EVEX
-func (x Int16x8) MaskedAdd(y Int16x8, z Mask16x8) Int16x8
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedGreaterEqual(y Int32x16, z Mask32x16) Mask32x16
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPCMPEQW, CPU Feature: AVX512EVEX
-func (x Int16x8) MaskedEqual(y Int16x8, z Mask16x8) Mask16x8
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedGreaterEqual(y Int64x2, z Mask64x2) Mask64x2
-// Greater compares for greater than.
-// Const Immediate = 6.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPCMPGTW, CPU Feature: AVX512EVEX
-func (x Int16x8) MaskedGreater(y Int16x8, z Mask16x8) Mask16x8
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedGreaterEqual(y Int64x4, z Mask64x4) Mask64x4
// GreaterEqual compares for greater than or equal.
// Const Immediate = 5.
//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x8) MaskedGreaterEqual(y Int16x8, z Mask16x8) Mask16x8
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedGreaterEqual(y Int64x8, z Mask64x8) Mask64x8
-// Less compares for less than.
-// Const Immediate = 1.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x8) MaskedLess(y Int16x8, z Mask16x8) Mask16x8
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x16) MaskedGreaterEqual(y Uint8x16, z Mask8x16) Mask8x16
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x8) MaskedLessEqual(y Int16x8, z Mask16x8) Mask16x8
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x32) MaskedGreaterEqual(y Uint8x32, z Mask8x32) Mask8x32
-// Max computes the maximum of corresponding elements.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPMAXSW, CPU Feature: AVX512EVEX
-func (x Int16x8) MaskedMax(y Int16x8, z Mask16x8) Int16x8
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x64) MaskedGreaterEqual(y Uint8x64, z Mask8x64) Mask8x64
-// Min computes the minimum of corresponding elements.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPMINSW, CPU Feature: AVX512EVEX
-func (x Int16x8) MaskedMin(y Int16x8, z Mask16x8) Int16x8
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedGreaterEqual(y Uint16x8, z Mask16x8) Mask16x8
-// MulHigh multiplies elements and stores the high part of the result, masked.
-//
-// Asm: VPMULHW, CPU Feature: AVX512EVEX
-func (x Int16x8) MaskedMulHigh(y Int16x8, z Mask16x8) Int16x8
-
-// MulLow multiplies elements and stores the low part of the result, masked.
-//
-// Asm: VPMULLW, CPU Feature: AVX512EVEX
-func (x Int16x8) MaskedMulLow(y Int16x8, z Mask16x8) Int16x8
-
-// NotEqual compares for inequality.
-// Const Immediate = 4.
-//
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
-func (x Int16x8) MaskedNotEqual(y Int16x8, z Mask16x8) Mask16x8
-
-// PairDotProd multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// Asm: VPMADDWD, CPU Feature: AVX512EVEX
-func (x Int16x8) MaskedPairDotProd(y Int16x8, z Mask16x8) Int32x4
-
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSW, CPU Feature: AVX512EVEX
-func (x Int16x8) MaskedSaturatedAdd(y Int16x8, z Mask16x8) Int16x8
-
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPSUBSW, CPU Feature: AVX512EVEX
-func (x Int16x8) MaskedSaturatedSub(y Int16x8, z Mask16x8) Int16x8
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedGreaterEqual(y Uint16x16, z Mask16x16) Mask16x16
-// Sub subtracts corresponding elements of two vectors.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPSUBW, CPU Feature: AVX512EVEX
-func (x Int16x8) MaskedSub(y Int16x8, z Mask16x8) Int16x8
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedGreaterEqual(y Uint16x32, z Mask16x32) Mask16x32
-// Add adds corresponding elements of two vectors.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPADDD, CPU Feature: AVX512EVEX
-func (x Int32x16) MaskedAdd(y Int32x16, z Mask32x16) Int32x16
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedGreaterEqual(y Uint32x4, z Mask32x4) Mask32x4
-// And performs a masked bitwise AND operation between two vectors.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPANDD, CPU Feature: AVX512EVEX
-func (x Int32x16) MaskedAnd(y Int32x16, z Mask32x16) Int32x16
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedGreaterEqual(y Uint32x8, z Mask32x8) Mask32x8
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPANDND, CPU Feature: AVX512EVEX
-func (x Int32x16) MaskedAndNot(y Int32x16, z Mask32x16) Int32x16
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedGreaterEqual(y Uint32x16, z Mask32x16) Mask32x16
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPCMPEQD, CPU Feature: AVX512EVEX
-func (x Int32x16) MaskedEqual(y Int32x16, z Mask32x16) Mask32x16
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedGreaterEqual(y Uint64x2, z Mask64x2) Mask64x2
-// Greater compares for greater than.
-// Const Immediate = 6.
+// GreaterEqual compares for greater than or equal.
+// Const Immediate = 5.
//
-// Asm: VPCMPGTD, CPU Feature: AVX512EVEX
-func (x Int32x16) MaskedGreater(y Int32x16, z Mask32x16) Mask32x16
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedGreaterEqual(y Uint64x4, z Mask64x4) Mask64x4
// GreaterEqual compares for greater than or equal.
// Const Immediate = 5.
//
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x16) MaskedGreaterEqual(y Int32x16, z Mask32x16) Mask32x16
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedGreaterEqual(y Uint64x8, z Mask64x8) Mask64x8
-// Less compares for less than.
-// Const Immediate = 1.
-//
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x16) MaskedLess(y Int32x16, z Mask32x16) Mask32x16
+/* MaskedIsNan */
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Const Immediate = 3.
//
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x16) MaskedLessEqual(y Int32x16, z Mask32x16) Mask32x16
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedIsNan(y Float32x4, z Mask32x4) Mask32x4
-// Max computes the maximum of corresponding elements.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Const Immediate = 3.
//
-// Asm: VPMAXSD, CPU Feature: AVX512EVEX
-func (x Int32x16) MaskedMax(y Int32x16, z Mask32x16) Int32x16
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedIsNan(y Float32x8, z Mask32x8) Mask32x8
-// Min computes the minimum of corresponding elements.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Const Immediate = 3.
//
-// Asm: VPMINSD, CPU Feature: AVX512EVEX
-func (x Int32x16) MaskedMin(y Int32x16, z Mask32x16) Int32x16
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedIsNan(y Float32x16, z Mask32x16) Mask32x16
-// MulLow multiplies elements and stores the low part of the result, masked.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Const Immediate = 3.
//
-// Asm: VPMULLD, CPU Feature: AVX512EVEX
-func (x Int32x16) MaskedMulLow(y Int32x16, z Mask32x16) Int32x16
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedIsNan(y Float64x2, z Mask64x2) Mask64x2
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Const Immediate = 3.
//
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x16) MaskedNotEqual(y Int32x16, z Mask32x16) Mask32x16
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedIsNan(y Float64x4, z Mask64x4) Mask64x4
-// Or performs a masked bitwise OR operation between two vectors.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Const Immediate = 3.
//
-// Asm: VPORD, CPU Feature: AVX512EVEX
-func (x Int32x16) MaskedOr(y Int32x16, z Mask32x16) Int32x16
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedIsNan(y Float64x8, z Mask64x8) Mask64x8
-// Sub subtracts corresponding elements of two vectors.
+/* MaskedLess */
+
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPSUBD, CPU Feature: AVX512EVEX
-func (x Int32x16) MaskedSub(y Int32x16, z Mask32x16) Int32x16
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedLess(y Float32x4, z Mask32x4) Mask32x4
-// Xor performs a masked bitwise XOR operation between two vectors.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPXORD, CPU Feature: AVX512EVEX
-func (x Int32x16) MaskedXor(y Int32x16, z Mask32x16) Int32x16
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedLess(y Float32x8, z Mask32x8) Mask32x8
-// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
-func (x Int32x16) PairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedLess(y Float32x16, z Mask32x16) Mask32x16
-// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
-func (x Int32x16) SaturatedPairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedLess(y Float64x2, z Mask64x2) Mask64x2
-// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
-func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedLess(y Float64x4, z Mask64x4) Mask64x4
-// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
-func (x Int32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedLess(y Float64x8, z Mask64x8) Mask64x8
-// Add adds corresponding elements of two vectors.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPADDD, CPU Feature: AVX512EVEX
-func (x Int32x4) MaskedAdd(y Int32x4, z Mask32x4) Int32x4
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x16) MaskedLess(y Int8x16, z Mask8x16) Mask8x16
-// And performs a masked bitwise AND operation between two vectors.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPANDD, CPU Feature: AVX512EVEX
-func (x Int32x4) MaskedAnd(y Int32x4, z Mask32x4) Int32x4
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x32) MaskedLess(y Int8x32, z Mask8x32) Mask8x32
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPANDND, CPU Feature: AVX512EVEX
-func (x Int32x4) MaskedAndNot(y Int32x4, z Mask32x4) Int32x4
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x64) MaskedLess(y Int8x64, z Mask8x64) Mask8x64
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPCMPEQD, CPU Feature: AVX512EVEX
-func (x Int32x4) MaskedEqual(y Int32x4, z Mask32x4) Mask32x4
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedLess(y Int16x8, z Mask16x8) Mask16x8
-// Greater compares for greater than.
-// Const Immediate = 6.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPCMPGTD, CPU Feature: AVX512EVEX
-func (x Int32x4) MaskedGreater(y Int32x4, z Mask32x4) Mask32x4
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedLess(y Int16x16, z Mask16x16) Mask16x16
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x4) MaskedGreaterEqual(y Int32x4, z Mask32x4) Mask32x4
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedLess(y Int16x32, z Mask16x32) Mask16x32
// Less compares for less than.
// Const Immediate = 1.
// Asm: VPCMPD, CPU Feature: AVX512EVEX
func (x Int32x4) MaskedLess(y Int32x4, z Mask32x4) Mask32x4
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// Less compares for less than.
+// Const Immediate = 1.
//
// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x4) MaskedLessEqual(y Int32x4, z Mask32x4) Mask32x4
+func (x Int32x8) MaskedLess(y Int32x8, z Mask32x8) Mask32x8
-// Max computes the maximum of corresponding elements.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPMAXSD, CPU Feature: AVX512EVEX
-func (x Int32x4) MaskedMax(y Int32x4, z Mask32x4) Int32x4
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedLess(y Int32x16, z Mask32x16) Mask32x16
-// Min computes the minimum of corresponding elements.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPMINSD, CPU Feature: AVX512EVEX
-func (x Int32x4) MaskedMin(y Int32x4, z Mask32x4) Int32x4
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedLess(y Int64x2, z Mask64x2) Mask64x2
-// MulLow multiplies elements and stores the low part of the result, masked.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPMULLD, CPU Feature: AVX512EVEX
-func (x Int32x4) MaskedMulLow(y Int32x4, z Mask32x4) Int32x4
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedLess(y Int64x4, z Mask64x4) Mask64x4
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x4) MaskedNotEqual(y Int32x4, z Mask32x4) Mask32x4
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedLess(y Int64x8, z Mask64x8) Mask64x8
-// Or performs a masked bitwise OR operation between two vectors.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPORD, CPU Feature: AVX512EVEX
-func (x Int32x4) MaskedOr(y Int32x4, z Mask32x4) Int32x4
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x16) MaskedLess(y Uint8x16, z Mask8x16) Mask8x16
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBD, CPU Feature: AVX512EVEX
-func (x Int32x4) MaskedSub(y Int32x4, z Mask32x4) Int32x4
-
-// Xor performs a masked bitwise XOR operation between two vectors.
-//
-// Asm: VPXORD, CPU Feature: AVX512EVEX
-func (x Int32x4) MaskedXor(y Int32x4, z Mask32x4) Int32x4
-
-// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPDPWSSD, CPU Feature: AVX_VNNI
-func (x Int32x4) PairDotProdAccumulate(y Int32x4, z Int32x4) Int32x4
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x32) MaskedLess(y Uint8x32, z Mask8x32) Mask8x32
-// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPDPWSSDS, CPU Feature: AVX_VNNI
-func (x Int32x4) SaturatedPairDotProdAccumulate(y Int32x4, z Int32x4) Int32x4
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x64) MaskedLess(y Uint8x64, z Mask8x64) Mask8x64
-// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
-func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Int32x4
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedLess(y Uint16x8, z Mask16x8) Mask16x8
-// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
-func (x Int32x4) UnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Int32x4
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedLess(y Uint16x16, z Mask16x16) Mask16x16
-// Add adds corresponding elements of two vectors.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPADDD, CPU Feature: AVX512EVEX
-func (x Int32x8) MaskedAdd(y Int32x8, z Mask32x8) Int32x8
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedLess(y Uint16x32, z Mask16x32) Mask16x32
-// And performs a masked bitwise AND operation between two vectors.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPANDD, CPU Feature: AVX512EVEX
-func (x Int32x8) MaskedAnd(y Int32x8, z Mask32x8) Int32x8
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedLess(y Uint32x4, z Mask32x4) Mask32x4
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPANDND, CPU Feature: AVX512EVEX
-func (x Int32x8) MaskedAndNot(y Int32x8, z Mask32x8) Int32x8
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedLess(y Uint32x8, z Mask32x8) Mask32x8
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPCMPEQD, CPU Feature: AVX512EVEX
-func (x Int32x8) MaskedEqual(y Int32x8, z Mask32x8) Mask32x8
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedLess(y Uint32x16, z Mask32x16) Mask32x16
-// Greater compares for greater than.
-// Const Immediate = 6.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPCMPGTD, CPU Feature: AVX512EVEX
-func (x Int32x8) MaskedGreater(y Int32x8, z Mask32x8) Mask32x8
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedLess(y Uint64x2, z Mask64x2) Mask64x2
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// Less compares for less than.
+// Const Immediate = 1.
//
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x8) MaskedGreaterEqual(y Int32x8, z Mask32x8) Mask32x8
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedLess(y Uint64x4, z Mask64x4) Mask64x4
// Less compares for less than.
// Const Immediate = 1.
//
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x8) MaskedLess(y Int32x8, z Mask32x8) Mask32x8
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedLess(y Uint64x8, z Mask64x8) Mask64x8
+
+/* MaskedLessEqual */
// LessEqual compares for less than or equal.
// Const Immediate = 2.
//
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x8) MaskedLessEqual(y Int32x8, z Mask32x8) Mask32x8
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXSD, CPU Feature: AVX512EVEX
-func (x Int32x8) MaskedMax(y Int32x8, z Mask32x8) Int32x8
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSD, CPU Feature: AVX512EVEX
-func (x Int32x8) MaskedMin(y Int32x8, z Mask32x8) Int32x8
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedLessEqual(y Float32x4, z Mask32x4) Mask32x4
-// MulLow multiplies elements and stores the low part of the result, masked.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPMULLD, CPU Feature: AVX512EVEX
-func (x Int32x8) MaskedMulLow(y Int32x8, z Mask32x8) Int32x8
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedLessEqual(y Float32x8, z Mask32x8) Mask32x8
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
-func (x Int32x8) MaskedNotEqual(y Int32x8, z Mask32x8) Mask32x8
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedLessEqual(y Float32x16, z Mask32x16) Mask32x16
-// Or performs a masked bitwise OR operation between two vectors.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPORD, CPU Feature: AVX512EVEX
-func (x Int32x8) MaskedOr(y Int32x8, z Mask32x8) Int32x8
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedLessEqual(y Float64x2, z Mask64x2) Mask64x2
-// Sub subtracts corresponding elements of two vectors.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPSUBD, CPU Feature: AVX512EVEX
-func (x Int32x8) MaskedSub(y Int32x8, z Mask32x8) Int32x8
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedLessEqual(y Float64x4, z Mask64x4) Mask64x4
-// Xor performs a masked bitwise XOR operation between two vectors.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPXORD, CPU Feature: AVX512EVEX
-func (x Int32x8) MaskedXor(y Int32x8, z Mask32x8) Int32x8
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedLessEqual(y Float64x8, z Mask64x8) Mask64x8
-// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPDPWSSD, CPU Feature: AVX_VNNI
-func (x Int32x8) PairDotProdAccumulate(y Int32x8, z Int32x8) Int32x8
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x16) MaskedLessEqual(y Int8x16, z Mask8x16) Mask8x16
-// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPDPWSSDS, CPU Feature: AVX_VNNI
-func (x Int32x8) SaturatedPairDotProdAccumulate(y Int32x8, z Int32x8) Int32x8
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x32) MaskedLessEqual(y Int8x32, z Mask8x32) Mask8x32
-// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
-func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Int32x8
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x64) MaskedLessEqual(y Int8x64, z Mask8x64) Mask8x64
-// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
-func (x Int32x8) UnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Int32x8
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedLessEqual(y Int16x8, z Mask16x8) Mask16x8
-// Add adds corresponding elements of two vectors.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPADDQ, CPU Feature: AVX512EVEX
-func (x Int64x2) MaskedAdd(y Int64x2, z Mask64x2) Int64x2
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedLessEqual(y Int16x16, z Mask16x16) Mask16x16
-// And performs a masked bitwise AND operation between two vectors.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPANDQ, CPU Feature: AVX512EVEX
-func (x Int64x2) MaskedAnd(y Int64x2, z Mask64x2) Int64x2
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedLessEqual(y Int16x32, z Mask16x32) Mask16x32
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPANDNQ, CPU Feature: AVX512EVEX
-func (x Int64x2) MaskedAndNot(y Int64x2, z Mask64x2) Int64x2
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedLessEqual(y Int32x4, z Mask32x4) Mask32x4
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPCMPEQQ, CPU Feature: AVX512EVEX
-func (x Int64x2) MaskedEqual(y Int64x2, z Mask64x2) Mask64x2
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedLessEqual(y Int32x8, z Mask32x8) Mask32x8
-// Greater compares for greater than.
-// Const Immediate = 6.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPCMPGTQ, CPU Feature: AVX512EVEX
-func (x Int64x2) MaskedGreater(y Int64x2, z Mask64x2) Mask64x2
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedLessEqual(y Int32x16, z Mask32x16) Mask32x16
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x2) MaskedGreaterEqual(y Int64x2, z Mask64x2) Mask64x2
+func (x Int64x2) MaskedLessEqual(y Int64x2, z Mask64x2) Mask64x2
-// Less compares for less than.
-// Const Immediate = 1.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x2) MaskedLess(y Int64x2, z Mask64x2) Mask64x2
+func (x Int64x4) MaskedLessEqual(y Int64x4, z Mask64x4) Mask64x4
// LessEqual compares for less than or equal.
// Const Immediate = 2.
//
// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x2) MaskedLessEqual(y Int64x2, z Mask64x2) Mask64x2
+func (x Int64x8) MaskedLessEqual(y Int64x8, z Mask64x8) Mask64x8
-// Max computes the maximum of corresponding elements.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPMAXSQ, CPU Feature: AVX512EVEX
-func (x Int64x2) MaskedMax(y Int64x2, z Mask64x2) Int64x2
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x16) MaskedLessEqual(y Uint8x16, z Mask8x16) Mask8x16
-// Min computes the minimum of corresponding elements.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPMINSQ, CPU Feature: AVX512EVEX
-func (x Int64x2) MaskedMin(y Int64x2, z Mask64x2) Int64x2
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x32) MaskedLessEqual(y Uint8x32, z Mask8x32) Mask8x32
-// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPMULDQ, CPU Feature: AVX512EVEX
-func (x Int64x2) MaskedMulEvenWiden(y Int64x2, z Mask64x2) Int64x2
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x64) MaskedLessEqual(y Uint8x64, z Mask8x64) Mask8x64
-// MulLow multiplies elements and stores the low part of the result, masked.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPMULLQ, CPU Feature: AVX512EVEX
-func (x Int64x2) MaskedMulLow(y Int64x2, z Mask64x2) Int64x2
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedLessEqual(y Uint16x8, z Mask16x8) Mask16x8
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x2) MaskedNotEqual(y Int64x2, z Mask64x2) Mask64x2
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedLessEqual(y Uint16x16, z Mask16x16) Mask16x16
-// Or performs a masked bitwise OR operation between two vectors.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPORQ, CPU Feature: AVX512EVEX
-func (x Int64x2) MaskedOr(y Int64x2, z Mask64x2) Int64x2
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedLessEqual(y Uint16x32, z Mask16x32) Mask16x32
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBQ, CPU Feature: AVX512EVEX
-func (x Int64x2) MaskedSub(y Int64x2, z Mask64x2) Int64x2
-
-// Xor performs a masked bitwise XOR operation between two vectors.
-//
-// Asm: VPXORQ, CPU Feature: AVX512EVEX
-func (x Int64x2) MaskedXor(y Int64x2, z Mask64x2) Int64x2
-
-// Add adds corresponding elements of two vectors.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPADDQ, CPU Feature: AVX512EVEX
-func (x Int64x4) MaskedAdd(y Int64x4, z Mask64x4) Int64x4
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedLessEqual(y Uint32x4, z Mask32x4) Mask32x4
-// And performs a masked bitwise AND operation between two vectors.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPANDQ, CPU Feature: AVX512EVEX
-func (x Int64x4) MaskedAnd(y Int64x4, z Mask64x4) Int64x4
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedLessEqual(y Uint32x8, z Mask32x8) Mask32x8
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPANDNQ, CPU Feature: AVX512EVEX
-func (x Int64x4) MaskedAndNot(y Int64x4, z Mask64x4) Int64x4
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedLessEqual(y Uint32x16, z Mask32x16) Mask32x16
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPCMPEQQ, CPU Feature: AVX512EVEX
-func (x Int64x4) MaskedEqual(y Int64x4, z Mask64x4) Mask64x4
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedLessEqual(y Uint64x2, z Mask64x2) Mask64x2
-// Greater compares for greater than.
-// Const Immediate = 6.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPCMPGTQ, CPU Feature: AVX512EVEX
-func (x Int64x4) MaskedGreater(y Int64x4, z Mask64x4) Mask64x4
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedLessEqual(y Uint64x4, z Mask64x4) Mask64x4
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// LessEqual compares for less than or equal.
+// Const Immediate = 2.
//
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x4) MaskedGreaterEqual(y Int64x4, z Mask64x4) Mask64x4
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedLessEqual(y Uint64x8, z Mask64x8) Mask64x8
-// Less compares for less than.
-// Const Immediate = 1.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x4) MaskedLess(y Int64x4, z Mask64x4) Mask64x4
+/* MaskedMax */
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x4) MaskedLessEqual(y Int64x4, z Mask64x4) Mask64x4
+// Asm: VMAXPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedMax(y Float32x4, z Mask32x4) Float32x4
// Max computes the maximum of corresponding elements.
//
-// Asm: VPMAXSQ, CPU Feature: AVX512EVEX
-func (x Int64x4) MaskedMax(y Int64x4, z Mask64x4) Int64x4
+// Asm: VMAXPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedMax(y Float32x8, z Mask32x8) Float32x8
-// Min computes the minimum of corresponding elements.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPMINSQ, CPU Feature: AVX512EVEX
-func (x Int64x4) MaskedMin(y Int64x4, z Mask64x4) Int64x4
+// Asm: VMAXPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedMax(y Float32x16, z Mask32x16) Float32x16
-// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPMULDQ, CPU Feature: AVX512EVEX
-func (x Int64x4) MaskedMulEvenWiden(y Int64x4, z Mask64x4) Int64x4
+// Asm: VMAXPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedMax(y Float64x2, z Mask64x2) Float64x2
-// MulLow multiplies elements and stores the low part of the result, masked.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPMULLQ, CPU Feature: AVX512EVEX
-func (x Int64x4) MaskedMulLow(y Int64x4, z Mask64x4) Int64x4
+// Asm: VMAXPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedMax(y Float64x4, z Mask64x4) Float64x4
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x4) MaskedNotEqual(y Int64x4, z Mask64x4) Mask64x4
+// Asm: VMAXPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedMax(y Float64x8, z Mask64x8) Float64x8
-// Or performs a masked bitwise OR operation between two vectors.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPORQ, CPU Feature: AVX512EVEX
-func (x Int64x4) MaskedOr(y Int64x4, z Mask64x4) Int64x4
+// Asm: VPMAXSB, CPU Feature: AVX512EVEX
+func (x Int8x16) MaskedMax(y Int8x16, z Mask8x16) Int8x16
-// Sub subtracts corresponding elements of two vectors.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPSUBQ, CPU Feature: AVX512EVEX
-func (x Int64x4) MaskedSub(y Int64x4, z Mask64x4) Int64x4
+// Asm: VPMAXSB, CPU Feature: AVX512EVEX
+func (x Int8x32) MaskedMax(y Int8x32, z Mask8x32) Int8x32
-// Xor performs a masked bitwise XOR operation between two vectors.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPXORQ, CPU Feature: AVX512EVEX
-func (x Int64x4) MaskedXor(y Int64x4, z Mask64x4) Int64x4
+// Asm: VPMAXSB, CPU Feature: AVX512EVEX
+func (x Int8x64) MaskedMax(y Int8x64, z Mask8x64) Int8x64
-// Add adds corresponding elements of two vectors.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPADDQ, CPU Feature: AVX512EVEX
-func (x Int64x8) MaskedAdd(y Int64x8, z Mask64x8) Int64x8
+// Asm: VPMAXSW, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedMax(y Int16x8, z Mask16x8) Int16x8
-// And performs a masked bitwise AND operation between two vectors.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPANDQ, CPU Feature: AVX512EVEX
-func (x Int64x8) MaskedAnd(y Int64x8, z Mask64x8) Int64x8
+// Asm: VPMAXSW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedMax(y Int16x16, z Mask16x16) Int16x16
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPANDNQ, CPU Feature: AVX512EVEX
-func (x Int64x8) MaskedAndNot(y Int64x8, z Mask64x8) Int64x8
+// Asm: VPMAXSW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedMax(y Int16x32, z Mask16x32) Int16x32
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPCMPEQQ, CPU Feature: AVX512EVEX
-func (x Int64x8) MaskedEqual(y Int64x8, z Mask64x8) Mask64x8
+// Asm: VPMAXSD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedMax(y Int32x4, z Mask32x4) Int32x4
-// Greater compares for greater than.
-// Const Immediate = 6.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPCMPGTQ, CPU Feature: AVX512EVEX
-func (x Int64x8) MaskedGreater(y Int64x8, z Mask64x8) Mask64x8
+// Asm: VPMAXSD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedMax(y Int32x8, z Mask32x8) Int32x8
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x8) MaskedGreaterEqual(y Int64x8, z Mask64x8) Mask64x8
+// Asm: VPMAXSD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedMax(y Int32x16, z Mask32x16) Int32x16
-// Less compares for less than.
-// Const Immediate = 1.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x8) MaskedLess(y Int64x8, z Mask64x8) Mask64x8
+// Asm: VPMAXSQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedMax(y Int64x2, z Mask64x2) Int64x2
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x8) MaskedLessEqual(y Int64x8, z Mask64x8) Mask64x8
+// Asm: VPMAXSQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedMax(y Int64x4, z Mask64x4) Int64x4
// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSQ, CPU Feature: AVX512EVEX
func (x Int64x8) MaskedMax(y Int64x8, z Mask64x8) Int64x8
-// Min computes the minimum of corresponding elements.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPMINSQ, CPU Feature: AVX512EVEX
-func (x Int64x8) MaskedMin(y Int64x8, z Mask64x8) Int64x8
+// Asm: VPMAXUB, CPU Feature: AVX512EVEX
+func (x Uint8x16) MaskedMax(y Uint8x16, z Mask8x16) Uint8x16
-// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPMULDQ, CPU Feature: AVX512EVEX
-func (x Int64x8) MaskedMulEvenWiden(y Int64x8, z Mask64x8) Int64x8
+// Asm: VPMAXUB, CPU Feature: AVX512EVEX
+func (x Uint8x32) MaskedMax(y Uint8x32, z Mask8x32) Uint8x32
-// MulLow multiplies elements and stores the low part of the result, masked.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPMULLQ, CPU Feature: AVX512EVEX
-func (x Int64x8) MaskedMulLow(y Int64x8, z Mask64x8) Int64x8
+// Asm: VPMAXUB, CPU Feature: AVX512EVEX
+func (x Uint8x64) MaskedMax(y Uint8x64, z Mask8x64) Uint8x64
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x8) MaskedNotEqual(y Int64x8, z Mask64x8) Mask64x8
+// Asm: VPMAXUW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedMax(y Uint16x8, z Mask16x8) Uint16x8
-// Or performs a masked bitwise OR operation between two vectors.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPORQ, CPU Feature: AVX512EVEX
-func (x Int64x8) MaskedOr(y Int64x8, z Mask64x8) Int64x8
+// Asm: VPMAXUW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedMax(y Uint16x16, z Mask16x16) Uint16x16
-// Sub subtracts corresponding elements of two vectors.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPSUBQ, CPU Feature: AVX512EVEX
-func (x Int64x8) MaskedSub(y Int64x8, z Mask64x8) Int64x8
+// Asm: VPMAXUW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedMax(y Uint16x32, z Mask16x32) Uint16x32
-// Xor performs a masked bitwise XOR operation between two vectors.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPXORQ, CPU Feature: AVX512EVEX
-func (x Int64x8) MaskedXor(y Int64x8, z Mask64x8) Int64x8
+// Asm: VPMAXUD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedMax(y Uint32x4, z Mask32x4) Uint32x4
-// Add adds corresponding elements of two vectors.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPADDB, CPU Feature: AVX512EVEX
-func (x Int8x16) MaskedAdd(y Int8x16, z Mask8x16) Int8x16
+// Asm: VPMAXUD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedMax(y Uint32x8, z Mask32x8) Uint32x8
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPCMPEQB, CPU Feature: AVX512EVEX
-func (x Int8x16) MaskedEqual(y Int8x16, z Mask8x16) Mask8x16
+// Asm: VPMAXUD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedMax(y Uint32x16, z Mask32x16) Uint32x16
-// Greater compares for greater than.
-// Const Immediate = 6.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPCMPGTB, CPU Feature: AVX512EVEX
-func (x Int8x16) MaskedGreater(y Int8x16, z Mask8x16) Mask8x16
+// Asm: VPMAXUQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedMax(y Uint64x2, z Mask64x2) Uint64x2
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x16) MaskedGreaterEqual(y Int8x16, z Mask8x16) Mask8x16
+// Asm: VPMAXUQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedMax(y Uint64x4, z Mask64x4) Uint64x4
-// Less compares for less than.
-// Const Immediate = 1.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x16) MaskedLess(y Int8x16, z Mask8x16) Mask8x16
+// Asm: VPMAXUQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedMax(y Uint64x8, z Mask64x8) Uint64x8
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
-//
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x16) MaskedLessEqual(y Int8x16, z Mask8x16) Mask8x16
+/* MaskedMin */
-// Max computes the maximum of corresponding elements.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPMAXSB, CPU Feature: AVX512EVEX
-func (x Int8x16) MaskedMax(y Int8x16, z Mask8x16) Int8x16
+// Asm: VMINPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedMin(y Float32x4, z Mask32x4) Float32x4
// Min computes the minimum of corresponding elements.
//
-// Asm: VPMINSB, CPU Feature: AVX512EVEX
-func (x Int8x16) MaskedMin(y Int8x16, z Mask8x16) Int8x16
+// Asm: VMINPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedMin(y Float32x8, z Mask32x8) Float32x8
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x16) MaskedNotEqual(y Int8x16, z Mask8x16) Mask8x16
+// Asm: VMINPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedMin(y Float32x16, z Mask32x16) Float32x16
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPADDSB, CPU Feature: AVX512EVEX
-func (x Int8x16) MaskedSaturatedAdd(y Int8x16, z Mask8x16) Int8x16
+// Asm: VMINPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedMin(y Float64x2, z Mask64x2) Float64x2
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPSUBSB, CPU Feature: AVX512EVEX
-func (x Int8x16) MaskedSaturatedSub(y Int8x16, z Mask8x16) Int8x16
+// Asm: VMINPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedMin(y Float64x4, z Mask64x4) Float64x4
-// Sub subtracts corresponding elements of two vectors.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPSUBB, CPU Feature: AVX512EVEX
-func (x Int8x16) MaskedSub(y Int8x16, z Mask8x16) Int8x16
+// Asm: VMINPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedMin(y Float64x8, z Mask64x8) Float64x8
-// Add adds corresponding elements of two vectors.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPADDB, CPU Feature: AVX512EVEX
-func (x Int8x32) MaskedAdd(y Int8x32, z Mask8x32) Int8x32
+// Asm: VPMINSB, CPU Feature: AVX512EVEX
+func (x Int8x16) MaskedMin(y Int8x16, z Mask8x16) Int8x16
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPCMPEQB, CPU Feature: AVX512EVEX
-func (x Int8x32) MaskedEqual(y Int8x32, z Mask8x32) Mask8x32
+// Asm: VPMINSB, CPU Feature: AVX512EVEX
+func (x Int8x32) MaskedMin(y Int8x32, z Mask8x32) Int8x32
-// Greater compares for greater than.
-// Const Immediate = 6.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPCMPGTB, CPU Feature: AVX512EVEX
-func (x Int8x32) MaskedGreater(y Int8x32, z Mask8x32) Mask8x32
+// Asm: VPMINSB, CPU Feature: AVX512EVEX
+func (x Int8x64) MaskedMin(y Int8x64, z Mask8x64) Int8x64
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x32) MaskedGreaterEqual(y Int8x32, z Mask8x32) Mask8x32
+// Asm: VPMINSW, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedMin(y Int16x8, z Mask16x8) Int16x8
-// Less compares for less than.
-// Const Immediate = 1.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x32) MaskedLess(y Int8x32, z Mask8x32) Mask8x32
+// Asm: VPMINSW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedMin(y Int16x16, z Mask16x16) Int16x16
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x32) MaskedLessEqual(y Int8x32, z Mask8x32) Mask8x32
+// Asm: VPMINSW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedMin(y Int16x32, z Mask16x32) Int16x32
-// Max computes the maximum of corresponding elements.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPMAXSB, CPU Feature: AVX512EVEX
-func (x Int8x32) MaskedMax(y Int8x32, z Mask8x32) Int8x32
+// Asm: VPMINSD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedMin(y Int32x4, z Mask32x4) Int32x4
// Min computes the minimum of corresponding elements.
//
-// Asm: VPMINSB, CPU Feature: AVX512EVEX
-func (x Int8x32) MaskedMin(y Int8x32, z Mask8x32) Int8x32
+// Asm: VPMINSD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedMin(y Int32x8, z Mask32x8) Int32x8
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x32) MaskedNotEqual(y Int8x32, z Mask8x32) Mask8x32
+// Asm: VPMINSD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedMin(y Int32x16, z Mask32x16) Int32x16
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPADDSB, CPU Feature: AVX512EVEX
-func (x Int8x32) MaskedSaturatedAdd(y Int8x32, z Mask8x32) Int8x32
+// Asm: VPMINSQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedMin(y Int64x2, z Mask64x2) Int64x2
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPSUBSB, CPU Feature: AVX512EVEX
-func (x Int8x32) MaskedSaturatedSub(y Int8x32, z Mask8x32) Int8x32
+// Asm: VPMINSQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedMin(y Int64x4, z Mask64x4) Int64x4
-// Sub subtracts corresponding elements of two vectors.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPSUBB, CPU Feature: AVX512EVEX
-func (x Int8x32) MaskedSub(y Int8x32, z Mask8x32) Int8x32
+// Asm: VPMINSQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedMin(y Int64x8, z Mask64x8) Int64x8
-// Add adds corresponding elements of two vectors.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPADDB, CPU Feature: AVX512EVEX
-func (x Int8x64) MaskedAdd(y Int8x64, z Mask8x64) Int8x64
+// Asm: VPMINUB, CPU Feature: AVX512EVEX
+func (x Uint8x16) MaskedMin(y Uint8x16, z Mask8x16) Uint8x16
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPCMPEQB, CPU Feature: AVX512EVEX
-func (x Int8x64) MaskedEqual(y Int8x64, z Mask8x64) Mask8x64
+// Asm: VPMINUB, CPU Feature: AVX512EVEX
+func (x Uint8x32) MaskedMin(y Uint8x32, z Mask8x32) Uint8x32
-// Greater compares for greater than.
-// Const Immediate = 6.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPCMPGTB, CPU Feature: AVX512EVEX
-func (x Int8x64) MaskedGreater(y Int8x64, z Mask8x64) Mask8x64
+// Asm: VPMINUB, CPU Feature: AVX512EVEX
+func (x Uint8x64) MaskedMin(y Uint8x64, z Mask8x64) Uint8x64
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x64) MaskedGreaterEqual(y Int8x64, z Mask8x64) Mask8x64
+// Asm: VPMINUW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedMin(y Uint16x8, z Mask16x8) Uint16x8
-// Less compares for less than.
-// Const Immediate = 1.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x64) MaskedLess(y Int8x64, z Mask8x64) Mask8x64
+// Asm: VPMINUW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedMin(y Uint16x16, z Mask16x16) Uint16x16
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x64) MaskedLessEqual(y Int8x64, z Mask8x64) Mask8x64
+// Asm: VPMINUW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedMin(y Uint16x32, z Mask16x32) Uint16x32
-// Max computes the maximum of corresponding elements.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPMAXSB, CPU Feature: AVX512EVEX
-func (x Int8x64) MaskedMax(y Int8x64, z Mask8x64) Int8x64
+// Asm: VPMINUD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedMin(y Uint32x4, z Mask32x4) Uint32x4
// Min computes the minimum of corresponding elements.
//
-// Asm: VPMINSB, CPU Feature: AVX512EVEX
-func (x Int8x64) MaskedMin(y Int8x64, z Mask8x64) Int8x64
+// Asm: VPMINUD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedMin(y Uint32x8, z Mask32x8) Uint32x8
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
-func (x Int8x64) MaskedNotEqual(y Int8x64, z Mask8x64) Mask8x64
+// Asm: VPMINUD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedMin(y Uint32x16, z Mask32x16) Uint32x16
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPADDSB, CPU Feature: AVX512EVEX
-func (x Int8x64) MaskedSaturatedAdd(y Int8x64, z Mask8x64) Int8x64
+// Asm: VPMINUQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedMin(y Uint64x2, z Mask64x2) Uint64x2
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPSUBSB, CPU Feature: AVX512EVEX
-func (x Int8x64) MaskedSaturatedSub(y Int8x64, z Mask8x64) Int8x64
+// Asm: VPMINUQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedMin(y Uint64x4, z Mask64x4) Uint64x4
-// Sub subtracts corresponding elements of two vectors.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPSUBB, CPU Feature: AVX512EVEX
-func (x Int8x64) MaskedSub(y Int8x64, z Mask8x64) Int8x64
+// Asm: VPMINUQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedMin(y Uint64x8, z Mask64x8) Uint64x8
-// Add adds corresponding elements of two vectors.
-//
-// Asm: VPADDW, CPU Feature: AVX512EVEX
-func (x Uint16x16) MaskedAdd(y Uint16x16, z Mask16x16) Uint16x16
+/* MaskedMul */
-// Average computes the rounded average of corresponding elements.
+// Mul multiplies corresponding elements of two vectors, masked.
//
-// Asm: VPAVGW, CPU Feature: AVX512EVEX
-func (x Uint16x16) MaskedAverage(y Uint16x16, z Mask16x16) Uint16x16
+// Asm: VMULPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedMul(y Float32x4, z Mask32x4) Float32x4
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// Mul multiplies corresponding elements of two vectors, masked.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x16) MaskedEqual(y Uint16x16, z Mask16x16) Mask16x16
+// Asm: VMULPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedMul(y Float32x8, z Mask32x8) Float32x8
-// Greater compares for greater than.
-// Const Immediate = 6.
+// Mul multiplies corresponding elements of two vectors, masked.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x16) MaskedGreater(y Uint16x16, z Mask16x16) Mask16x16
+// Asm: VMULPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedMul(y Float32x16, z Mask32x16) Float32x16
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// Mul multiplies corresponding elements of two vectors, masked.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x16) MaskedGreaterEqual(y Uint16x16, z Mask16x16) Mask16x16
+// Asm: VMULPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedMul(y Float64x2, z Mask64x2) Float64x2
-// Less compares for less than.
-// Const Immediate = 1.
+// Mul multiplies corresponding elements of two vectors, masked.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x16) MaskedLess(y Uint16x16, z Mask16x16) Mask16x16
+// Asm: VMULPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedMul(y Float64x4, z Mask64x4) Float64x4
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// Mul multiplies corresponding elements of two vectors, masked.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x16) MaskedLessEqual(y Uint16x16, z Mask16x16) Mask16x16
+// Asm: VMULPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedMul(y Float64x8, z Mask64x8) Float64x8
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUW, CPU Feature: AVX512EVEX
-func (x Uint16x16) MaskedMax(y Uint16x16, z Mask16x16) Uint16x16
+/* MaskedMulByPowOf2 */
-// Min computes the minimum of corresponding elements.
+// MulByPowOf2 multiplies elements by a power of 2.
//
-// Asm: VPMINUW, CPU Feature: AVX512EVEX
-func (x Uint16x16) MaskedMin(y Uint16x16, z Mask16x16) Uint16x16
+// Asm: VSCALEFPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedMulByPowOf2(y Float32x4, z Mask32x4) Float32x4
-// MulHigh multiplies elements and stores the high part of the result, masked.
+// MulByPowOf2 multiplies elements by a power of 2.
//
-// Asm: VPMULHUW, CPU Feature: AVX512EVEX
-func (x Uint16x16) MaskedMulHigh(y Uint16x16, z Mask16x16) Uint16x16
+// Asm: VSCALEFPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedMulByPowOf2(y Float32x8, z Mask32x8) Float32x8
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// MulByPowOf2 multiplies elements by a power of 2.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x16) MaskedNotEqual(y Uint16x16, z Mask16x16) Mask16x16
+// Asm: VSCALEFPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedMulByPowOf2(y Float32x16, z Mask32x16) Float32x16
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
+// MulByPowOf2 multiplies elements by a power of 2.
//
-// Asm: VPADDSW, CPU Feature: AVX512EVEX
-func (x Uint16x16) MaskedSaturatedAdd(y Uint16x16, z Mask16x16) Uint16x16
+// Asm: VSCALEFPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedMulByPowOf2(y Float64x2, z Mask64x2) Float64x2
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// MulByPowOf2 multiplies elements by a power of 2.
//
-// Asm: VPSUBSW, CPU Feature: AVX512EVEX
-func (x Uint16x16) MaskedSaturatedSub(y Uint16x16, z Mask16x16) Uint16x16
+// Asm: VSCALEFPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedMulByPowOf2(y Float64x4, z Mask64x4) Float64x4
-// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
+// MulByPowOf2 multiplies elements by a power of 2.
//
-// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
-func (x Uint16x16) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x16, z Mask16x16) Int16x16
+// Asm: VSCALEFPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedMulByPowOf2(y Float64x8, z Mask64x8) Float64x8
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBW, CPU Feature: AVX512EVEX
-func (x Uint16x16) MaskedSub(y Uint16x16, z Mask16x16) Uint16x16
+/* MaskedMulEvenWiden */
-// Add adds corresponding elements of two vectors.
+// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
+// Result[i] = v1.Even[i] * v2.Even[i].
//
-// Asm: VPADDW, CPU Feature: AVX512EVEX
-func (x Uint16x32) MaskedAdd(y Uint16x32, z Mask16x32) Uint16x32
+// Asm: VPMULDQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedMulEvenWiden(y Int64x2, z Mask64x2) Int64x2
-// Average computes the rounded average of corresponding elements.
+// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
+// Result[i] = v1.Even[i] * v2.Even[i].
//
-// Asm: VPAVGW, CPU Feature: AVX512EVEX
-func (x Uint16x32) MaskedAverage(y Uint16x32, z Mask16x32) Uint16x32
+// Asm: VPMULDQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedMulEvenWiden(y Int64x4, z Mask64x4) Int64x4
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
+// Result[i] = v1.Even[i] * v2.Even[i].
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x32) MaskedEqual(y Uint16x32, z Mask16x32) Mask16x32
+// Asm: VPMULDQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedMulEvenWiden(y Int64x8, z Mask64x8) Int64x8
-// Greater compares for greater than.
-// Const Immediate = 6.
+// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
+// Result[i] = v1.Even[i] * v2.Even[i].
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x32) MaskedGreater(y Uint16x32, z Mask16x32) Mask16x32
+// Asm: VPMULUDQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedMulEvenWiden(y Uint64x2, z Mask64x2) Uint64x2
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
+// Result[i] = v1.Even[i] * v2.Even[i].
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x32) MaskedGreaterEqual(y Uint16x32, z Mask16x32) Mask16x32
+// Asm: VPMULUDQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedMulEvenWiden(y Uint64x4, z Mask64x4) Uint64x4
-// Less compares for less than.
-// Const Immediate = 1.
+// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
+// Result[i] = v1.Even[i] * v2.Even[i].
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x32) MaskedLess(y Uint16x32, z Mask16x32) Mask16x32
+// Asm: VPMULUDQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedMulEvenWiden(y Uint64x8, z Mask64x8) Uint64x8
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+/* MaskedMulHigh */
+
+// MulHigh multiplies elements and stores the high part of the result, masked.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x32) MaskedLessEqual(y Uint16x32, z Mask16x32) Mask16x32
+// Asm: VPMULHW, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedMulHigh(y Int16x8, z Mask16x8) Int16x8
-// Max computes the maximum of corresponding elements.
+// MulHigh multiplies elements and stores the high part of the result, masked.
//
-// Asm: VPMAXUW, CPU Feature: AVX512EVEX
-func (x Uint16x32) MaskedMax(y Uint16x32, z Mask16x32) Uint16x32
+// Asm: VPMULHW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedMulHigh(y Int16x16, z Mask16x16) Int16x16
-// Min computes the minimum of corresponding elements.
+// MulHigh multiplies elements and stores the high part of the result, masked.
//
-// Asm: VPMINUW, CPU Feature: AVX512EVEX
-func (x Uint16x32) MaskedMin(y Uint16x32, z Mask16x32) Uint16x32
+// Asm: VPMULHW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedMulHigh(y Int16x32, z Mask16x32) Int16x32
// MulHigh multiplies elements and stores the high part of the result, masked.
//
// Asm: VPMULHUW, CPU Feature: AVX512EVEX
-func (x Uint16x32) MaskedMulHigh(y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x8) MaskedMulHigh(y Uint16x8, z Mask16x8) Uint16x8
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// MulHigh multiplies elements and stores the high part of the result, masked.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x32) MaskedNotEqual(y Uint16x32, z Mask16x32) Mask16x32
+// Asm: VPMULHUW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedMulHigh(y Uint16x16, z Mask16x16) Uint16x16
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
+// MulHigh multiplies elements and stores the high part of the result, masked.
//
-// Asm: VPADDSW, CPU Feature: AVX512EVEX
-func (x Uint16x32) MaskedSaturatedAdd(y Uint16x32, z Mask16x32) Uint16x32
+// Asm: VPMULHUW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedMulHigh(y Uint16x32, z Mask16x32) Uint16x32
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
-//
-// Asm: VPSUBSW, CPU Feature: AVX512EVEX
-func (x Uint16x32) MaskedSaturatedSub(y Uint16x32, z Mask16x32) Uint16x32
+/* MaskedMulLow */
-// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
+// MulLow multiplies elements and stores the low part of the result, masked.
//
-// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
-func (x Uint16x32) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x32, z Mask16x32) Int16x32
+// Asm: VPMULLW, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedMulLow(y Int16x8, z Mask16x8) Int16x8
-// Sub subtracts corresponding elements of two vectors.
+// MulLow multiplies elements and stores the low part of the result, masked.
//
-// Asm: VPSUBW, CPU Feature: AVX512EVEX
-func (x Uint16x32) MaskedSub(y Uint16x32, z Mask16x32) Uint16x32
+// Asm: VPMULLW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedMulLow(y Int16x16, z Mask16x16) Int16x16
-// Add adds corresponding elements of two vectors.
+// MulLow multiplies elements and stores the low part of the result, masked.
//
-// Asm: VPADDW, CPU Feature: AVX512EVEX
-func (x Uint16x8) MaskedAdd(y Uint16x8, z Mask16x8) Uint16x8
+// Asm: VPMULLW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedMulLow(y Int16x32, z Mask16x32) Int16x32
-// Average computes the rounded average of corresponding elements.
+// MulLow multiplies elements and stores the low part of the result, masked.
//
-// Asm: VPAVGW, CPU Feature: AVX512EVEX
-func (x Uint16x8) MaskedAverage(y Uint16x8, z Mask16x8) Uint16x8
+// Asm: VPMULLD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedMulLow(y Int32x4, z Mask32x4) Int32x4
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// MulLow multiplies elements and stores the low part of the result, masked.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x8) MaskedEqual(y Uint16x8, z Mask16x8) Mask16x8
+// Asm: VPMULLD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedMulLow(y Int32x8, z Mask32x8) Int32x8
-// Greater compares for greater than.
-// Const Immediate = 6.
+// MulLow multiplies elements and stores the low part of the result, masked.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x8) MaskedGreater(y Uint16x8, z Mask16x8) Mask16x8
+// Asm: VPMULLD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedMulLow(y Int32x16, z Mask32x16) Int32x16
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// MulLow multiplies elements and stores the low part of the result, masked.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x8) MaskedGreaterEqual(y Uint16x8, z Mask16x8) Mask16x8
+// Asm: VPMULLQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedMulLow(y Int64x2, z Mask64x2) Int64x2
-// Less compares for less than.
-// Const Immediate = 1.
+// MulLow multiplies elements and stores the low part of the result, masked.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x8) MaskedLess(y Uint16x8, z Mask16x8) Mask16x8
+// Asm: VPMULLQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedMulLow(y Int64x4, z Mask64x4) Int64x4
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// MulLow multiplies elements and stores the low part of the result, masked.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x8) MaskedLessEqual(y Uint16x8, z Mask16x8) Mask16x8
+// Asm: VPMULLQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedMulLow(y Int64x8, z Mask64x8) Int64x8
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUW, CPU Feature: AVX512EVEX
-func (x Uint16x8) MaskedMax(y Uint16x8, z Mask16x8) Uint16x8
+/* MaskedNotEqual */
-// Min computes the minimum of corresponding elements.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPMINUW, CPU Feature: AVX512EVEX
-func (x Uint16x8) MaskedMin(y Uint16x8, z Mask16x8) Uint16x8
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedNotEqual(y Float32x4, z Mask32x4) Mask32x4
-// MulHigh multiplies elements and stores the high part of the result, masked.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPMULHUW, CPU Feature: AVX512EVEX
-func (x Uint16x8) MaskedMulHigh(y Uint16x8, z Mask16x8) Uint16x8
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedNotEqual(y Float32x8, z Mask32x8) Mask32x8
// NotEqual compares for inequality.
// Const Immediate = 4.
//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x8) MaskedNotEqual(y Uint16x8, z Mask16x8) Mask16x8
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedNotEqual(y Float32x16, z Mask32x16) Mask32x16
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPADDSW, CPU Feature: AVX512EVEX
-func (x Uint16x8) MaskedSaturatedAdd(y Uint16x8, z Mask16x8) Uint16x8
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedNotEqual(y Float64x2, z Mask64x2) Mask64x2
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPSUBSW, CPU Feature: AVX512EVEX
-func (x Uint16x8) MaskedSaturatedSub(y Uint16x8, z Mask16x8) Uint16x8
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedNotEqual(y Float64x4, z Mask64x4) Mask64x4
-// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
-func (x Uint16x8) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x8, z Mask16x8) Int16x8
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedNotEqual(y Float64x8, z Mask64x8) Mask64x8
-// Sub subtracts corresponding elements of two vectors.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPSUBW, CPU Feature: AVX512EVEX
-func (x Uint16x8) MaskedSub(y Uint16x8, z Mask16x8) Uint16x8
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x16) MaskedNotEqual(y Int8x16, z Mask8x16) Mask8x16
-// Add adds corresponding elements of two vectors.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPADDD, CPU Feature: AVX512EVEX
-func (x Uint32x16) MaskedAdd(y Uint32x16, z Mask32x16) Uint32x16
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x32) MaskedNotEqual(y Int8x32, z Mask8x32) Mask8x32
-// And performs a masked bitwise AND operation between two vectors.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPANDD, CPU Feature: AVX512EVEX
-func (x Uint32x16) MaskedAnd(y Uint32x16, z Mask32x16) Uint32x16
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x64) MaskedNotEqual(y Int8x64, z Mask8x64) Mask8x64
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPANDND, CPU Feature: AVX512EVEX
-func (x Uint32x16) MaskedAndNot(y Uint32x16, z Mask32x16) Uint32x16
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedNotEqual(y Int16x8, z Mask16x8) Mask16x8
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x16) MaskedEqual(y Uint32x16, z Mask32x16) Mask32x16
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedNotEqual(y Int16x16, z Mask16x16) Mask16x16
-// Greater compares for greater than.
-// Const Immediate = 6.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x16) MaskedGreater(y Uint32x16, z Mask32x16) Mask32x16
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedNotEqual(y Int16x32, z Mask16x32) Mask16x32
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x16) MaskedGreaterEqual(y Uint32x16, z Mask32x16) Mask32x16
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedNotEqual(y Int32x4, z Mask32x4) Mask32x4
-// Less compares for less than.
-// Const Immediate = 1.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x16) MaskedLess(y Uint32x16, z Mask32x16) Mask32x16
-
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x16) MaskedLessEqual(y Uint32x16, z Mask32x16) Mask32x16
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUD, CPU Feature: AVX512EVEX
-func (x Uint32x16) MaskedMax(y Uint32x16, z Mask32x16) Uint32x16
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUD, CPU Feature: AVX512EVEX
-func (x Uint32x16) MaskedMin(y Uint32x16, z Mask32x16) Uint32x16
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedNotEqual(y Int32x8, z Mask32x8) Mask32x8
// NotEqual compares for inequality.
// Const Immediate = 4.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x16) MaskedNotEqual(y Uint32x16, z Mask32x16) Mask32x16
-
-// Or performs a masked bitwise OR operation between two vectors.
-//
-// Asm: VPORD, CPU Feature: AVX512EVEX
-func (x Uint32x16) MaskedOr(y Uint32x16, z Mask32x16) Uint32x16
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedNotEqual(y Int32x16, z Mask32x16) Mask32x16
-// Sub subtracts corresponding elements of two vectors.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPSUBD, CPU Feature: AVX512EVEX
-func (x Uint32x16) MaskedSub(y Uint32x16, z Mask32x16) Uint32x16
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedNotEqual(y Int64x2, z Mask64x2) Mask64x2
-// Xor performs a masked bitwise XOR operation between two vectors.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPXORD, CPU Feature: AVX512EVEX
-func (x Uint32x16) MaskedXor(y Uint32x16, z Mask32x16) Uint32x16
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedNotEqual(y Int64x4, z Mask64x4) Mask64x4
-// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
-func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Uint32x16
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedNotEqual(y Int64x8, z Mask64x8) Mask64x8
-// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
-func (x Uint32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Uint32x16
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x16) MaskedNotEqual(y Uint8x16, z Mask8x16) Mask8x16
-// Add adds corresponding elements of two vectors.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPADDD, CPU Feature: AVX512EVEX
-func (x Uint32x4) MaskedAdd(y Uint32x4, z Mask32x4) Uint32x4
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x32) MaskedNotEqual(y Uint8x32, z Mask8x32) Mask8x32
-// And performs a masked bitwise AND operation between two vectors.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPANDD, CPU Feature: AVX512EVEX
-func (x Uint32x4) MaskedAnd(y Uint32x4, z Mask32x4) Uint32x4
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x64) MaskedNotEqual(y Uint8x64, z Mask8x64) Mask8x64
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPANDND, CPU Feature: AVX512EVEX
-func (x Uint32x4) MaskedAndNot(y Uint32x4, z Mask32x4) Uint32x4
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedNotEqual(y Uint16x8, z Mask16x8) Mask16x8
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x4) MaskedEqual(y Uint32x4, z Mask32x4) Mask32x4
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedNotEqual(y Uint16x16, z Mask16x16) Mask16x16
-// Greater compares for greater than.
-// Const Immediate = 6.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x4) MaskedGreater(y Uint32x4, z Mask32x4) Mask32x4
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedNotEqual(y Uint16x32, z Mask16x32) Mask16x32
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x4) MaskedGreaterEqual(y Uint32x4, z Mask32x4) Mask32x4
+func (x Uint32x4) MaskedNotEqual(y Uint32x4, z Mask32x4) Mask32x4
-// Less compares for less than.
-// Const Immediate = 1.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x4) MaskedLess(y Uint32x4, z Mask32x4) Mask32x4
+func (x Uint32x8) MaskedNotEqual(y Uint32x8, z Mask32x8) Mask32x8
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x4) MaskedLessEqual(y Uint32x4, z Mask32x4) Mask32x4
+func (x Uint32x16) MaskedNotEqual(y Uint32x16, z Mask32x16) Mask32x16
-// Max computes the maximum of corresponding elements.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPMAXUD, CPU Feature: AVX512EVEX
-func (x Uint32x4) MaskedMax(y Uint32x4, z Mask32x4) Uint32x4
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedNotEqual(y Uint64x2, z Mask64x2) Mask64x2
-// Min computes the minimum of corresponding elements.
+// NotEqual compares for inequality.
+// Const Immediate = 4.
//
-// Asm: VPMINUD, CPU Feature: AVX512EVEX
-func (x Uint32x4) MaskedMin(y Uint32x4, z Mask32x4) Uint32x4
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedNotEqual(y Uint64x4, z Mask64x4) Mask64x4
// NotEqual compares for inequality.
// Const Immediate = 4.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x4) MaskedNotEqual(y Uint32x4, z Mask32x4) Mask32x4
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedNotEqual(y Uint64x8, z Mask64x8) Mask64x8
+
+/* MaskedOr */
// Or performs a masked bitwise OR operation between two vectors.
//
-// Asm: VPORD, CPU Feature: AVX512EVEX
-func (x Uint32x4) MaskedOr(y Uint32x4, z Mask32x4) Uint32x4
+// Asm: VORPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedOr(y Float32x4, z Mask32x4) Float32x4
-// Sub subtracts corresponding elements of two vectors.
+// Or performs a masked bitwise OR operation between two vectors.
//
-// Asm: VPSUBD, CPU Feature: AVX512EVEX
-func (x Uint32x4) MaskedSub(y Uint32x4, z Mask32x4) Uint32x4
+// Asm: VORPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedOr(y Float32x8, z Mask32x8) Float32x8
-// Xor performs a masked bitwise XOR operation between two vectors.
+// Or performs a masked bitwise OR operation between two vectors.
//
-// Asm: VPXORD, CPU Feature: AVX512EVEX
-func (x Uint32x4) MaskedXor(y Uint32x4, z Mask32x4) Uint32x4
+// Asm: VORPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedOr(y Float32x16, z Mask32x16) Float32x16
-// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// Or performs a masked bitwise OR operation between two vectors.
//
-// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
-func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Uint32x4
+// Asm: VORPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedOr(y Float64x2, z Mask64x2) Float64x2
-// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// Or performs a masked bitwise OR operation between two vectors.
//
-// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
-func (x Uint32x4) UnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Uint32x4
+// Asm: VORPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedOr(y Float64x4, z Mask64x4) Float64x4
-// Add adds corresponding elements of two vectors.
+// Or performs a masked bitwise OR operation between two vectors.
//
-// Asm: VPADDD, CPU Feature: AVX512EVEX
-func (x Uint32x8) MaskedAdd(y Uint32x8, z Mask32x8) Uint32x8
+// Asm: VORPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedOr(y Float64x8, z Mask64x8) Float64x8
-// And performs a masked bitwise AND operation between two vectors.
+// Or performs a masked bitwise OR operation between two vectors.
//
-// Asm: VPANDD, CPU Feature: AVX512EVEX
-func (x Uint32x8) MaskedAnd(y Uint32x8, z Mask32x8) Uint32x8
+// Asm: VPORD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedOr(y Int32x4, z Mask32x4) Int32x4
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// Or performs a masked bitwise OR operation between two vectors.
//
-// Asm: VPANDND, CPU Feature: AVX512EVEX
-func (x Uint32x8) MaskedAndNot(y Uint32x8, z Mask32x8) Uint32x8
+// Asm: VPORD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedOr(y Int32x8, z Mask32x8) Int32x8
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// Or performs a masked bitwise OR operation between two vectors.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x8) MaskedEqual(y Uint32x8, z Mask32x8) Mask32x8
+// Asm: VPORD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedOr(y Int32x16, z Mask32x16) Int32x16
-// Greater compares for greater than.
-// Const Immediate = 6.
+// Or performs a masked bitwise OR operation between two vectors.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x8) MaskedGreater(y Uint32x8, z Mask32x8) Mask32x8
+// Asm: VPORQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedOr(y Int64x2, z Mask64x2) Int64x2
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// Or performs a masked bitwise OR operation between two vectors.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x8) MaskedGreaterEqual(y Uint32x8, z Mask32x8) Mask32x8
+// Asm: VPORQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedOr(y Int64x4, z Mask64x4) Int64x4
-// Less compares for less than.
-// Const Immediate = 1.
+// Or performs a masked bitwise OR operation between two vectors.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x8) MaskedLess(y Uint32x8, z Mask32x8) Mask32x8
+// Asm: VPORQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedOr(y Int64x8, z Mask64x8) Int64x8
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// Or performs a masked bitwise OR operation between two vectors.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x8) MaskedLessEqual(y Uint32x8, z Mask32x8) Mask32x8
+// Asm: VPORD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedOr(y Uint32x4, z Mask32x4) Uint32x4
-// Max computes the maximum of corresponding elements.
+// Or performs a masked bitwise OR operation between two vectors.
//
-// Asm: VPMAXUD, CPU Feature: AVX512EVEX
-func (x Uint32x8) MaskedMax(y Uint32x8, z Mask32x8) Uint32x8
+// Asm: VPORD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedOr(y Uint32x8, z Mask32x8) Uint32x8
-// Min computes the minimum of corresponding elements.
+// Or performs a masked bitwise OR operation between two vectors.
//
-// Asm: VPMINUD, CPU Feature: AVX512EVEX
-func (x Uint32x8) MaskedMin(y Uint32x8, z Mask32x8) Uint32x8
+// Asm: VPORD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedOr(y Uint32x16, z Mask32x16) Uint32x16
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// Or performs a masked bitwise OR operation between two vectors.
//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x8) MaskedNotEqual(y Uint32x8, z Mask32x8) Mask32x8
+// Asm: VPORQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedOr(y Uint64x2, z Mask64x2) Uint64x2
// Or performs a masked bitwise OR operation between two vectors.
//
-// Asm: VPORD, CPU Feature: AVX512EVEX
-func (x Uint32x8) MaskedOr(y Uint32x8, z Mask32x8) Uint32x8
+// Asm: VPORQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedOr(y Uint64x4, z Mask64x4) Uint64x4
-// Sub subtracts corresponding elements of two vectors.
+// Or performs a masked bitwise OR operation between two vectors.
//
-// Asm: VPSUBD, CPU Feature: AVX512EVEX
-func (x Uint32x8) MaskedSub(y Uint32x8, z Mask32x8) Uint32x8
+// Asm: VPORQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedOr(y Uint64x8, z Mask64x8) Uint64x8
-// Xor performs a masked bitwise XOR operation between two vectors.
-//
-// Asm: VPXORD, CPU Feature: AVX512EVEX
-func (x Uint32x8) MaskedXor(y Uint32x8, z Mask32x8) Uint32x8
+/* MaskedPairDotProd */
-// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// PairDotProd multiplies the elements and add the pairs together,
+// yielding a vector of half as many elements with twice the input element size.
//
-// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
-func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Uint32x8
+// Asm: VPMADDWD, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedPairDotProd(y Int16x8, z Mask16x8) Int32x4
-// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// PairDotProd multiplies the elements and add the pairs together,
+// yielding a vector of half as many elements with twice the input element size.
//
-// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
-func (x Uint32x8) UnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Uint32x8
+// Asm: VPMADDWD, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedPairDotProd(y Int16x16, z Mask16x16) Int32x8
-// Add adds corresponding elements of two vectors.
+// PairDotProd multiplies the elements and add the pairs together,
+// yielding a vector of half as many elements with twice the input element size.
//
-// Asm: VPADDQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) MaskedAdd(y Uint64x2, z Mask64x2) Uint64x2
+// Asm: VPMADDWD, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedPairDotProd(y Int16x32, z Mask16x32) Int32x16
-// And performs a masked bitwise AND operation between two vectors.
-//
-// Asm: VPANDQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) MaskedAnd(y Uint64x2, z Mask64x2) Uint64x2
+/* MaskedPairDotProdAccumulate */
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
//
-// Asm: VPANDNQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) MaskedAndNot(y Uint64x2, z Mask64x2) Uint64x2
+// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedPairDotProdAccumulate(y Int16x8, z Int32x4, u Mask32x4) Int32x4
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) MaskedEqual(y Uint64x2, z Mask64x2) Mask64x2
+// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedPairDotProdAccumulate(y Int16x16, z Int32x8, u Mask32x8) Int32x8
-// Greater compares for greater than.
-// Const Immediate = 6.
+// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) MaskedGreater(y Uint64x2, z Mask64x2) Mask64x2
+// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedPairDotProdAccumulate(y Int16x32, z Int32x16, u Mask32x16) Int32x16
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) MaskedGreaterEqual(y Uint64x2, z Mask64x2) Mask64x2
+/* MaskedPopCount */
-// Less compares for less than.
-// Const Immediate = 1.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) MaskedLess(y Uint64x2, z Mask64x2) Mask64x2
+// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+func (x Int8x16) MaskedPopCount(y Mask8x16) Int8x16
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) MaskedLessEqual(y Uint64x2, z Mask64x2) Mask64x2
+// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+func (x Int8x32) MaskedPopCount(y Mask8x32) Int8x32
-// Max computes the maximum of corresponding elements.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPMAXUQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) MaskedMax(y Uint64x2, z Mask64x2) Uint64x2
+// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+func (x Int8x64) MaskedPopCount(y Mask8x64) Int8x64
-// Min computes the minimum of corresponding elements.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPMINUQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) MaskedMin(y Uint64x2, z Mask64x2) Uint64x2
+// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedPopCount(y Mask16x8) Int16x8
-// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPMULUDQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) MaskedMulEvenWiden(y Uint64x2, z Mask64x2) Uint64x2
+// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedPopCount(y Mask16x16) Int16x16
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) MaskedNotEqual(y Uint64x2, z Mask64x2) Mask64x2
+// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedPopCount(y Mask16x32) Int16x32
-// Or performs a masked bitwise OR operation between two vectors.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPORQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) MaskedOr(y Uint64x2, z Mask64x2) Uint64x2
+// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedPopCount(y Mask32x4) Int32x4
-// Sub subtracts corresponding elements of two vectors.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPSUBQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) MaskedSub(y Uint64x2, z Mask64x2) Uint64x2
+// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedPopCount(y Mask32x8) Int32x8
-// Xor performs a masked bitwise XOR operation between two vectors.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPXORQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) MaskedXor(y Uint64x2, z Mask64x2) Uint64x2
+// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedPopCount(y Mask32x16) Int32x16
-// Add adds corresponding elements of two vectors.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPADDQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) MaskedAdd(y Uint64x4, z Mask64x4) Uint64x4
+// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedPopCount(y Mask64x2) Int64x2
-// And performs a masked bitwise AND operation between two vectors.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPANDQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) MaskedAnd(y Uint64x4, z Mask64x4) Uint64x4
+// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedPopCount(y Mask64x4) Int64x4
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPANDNQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) MaskedAndNot(y Uint64x4, z Mask64x4) Uint64x4
+// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedPopCount(y Mask64x8) Int64x8
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) MaskedEqual(y Uint64x4, z Mask64x4) Mask64x4
+// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+func (x Uint8x16) MaskedPopCount(y Mask8x16) Uint8x16
-// Greater compares for greater than.
-// Const Immediate = 6.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) MaskedGreater(y Uint64x4, z Mask64x4) Mask64x4
+// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+func (x Uint8x32) MaskedPopCount(y Mask8x32) Uint8x32
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) MaskedGreaterEqual(y Uint64x4, z Mask64x4) Mask64x4
+// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+func (x Uint8x64) MaskedPopCount(y Mask8x64) Uint8x64
-// Less compares for less than.
-// Const Immediate = 1.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) MaskedLess(y Uint64x4, z Mask64x4) Mask64x4
+// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedPopCount(y Mask16x8) Uint16x8
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) MaskedLessEqual(y Uint64x4, z Mask64x4) Mask64x4
+// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedPopCount(y Mask16x16) Uint16x16
-// Max computes the maximum of corresponding elements.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPMAXUQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) MaskedMax(y Uint64x4, z Mask64x4) Uint64x4
+// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedPopCount(y Mask16x32) Uint16x32
-// Min computes the minimum of corresponding elements.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPMINUQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) MaskedMin(y Uint64x4, z Mask64x4) Uint64x4
+// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedPopCount(y Mask32x4) Uint32x4
-// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPMULUDQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) MaskedMulEvenWiden(y Uint64x4, z Mask64x4) Uint64x4
+// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedPopCount(y Mask32x8) Uint32x8
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) MaskedNotEqual(y Uint64x4, z Mask64x4) Mask64x4
+// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedPopCount(y Mask32x16) Uint32x16
-// Or performs a masked bitwise OR operation between two vectors.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPORQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) MaskedOr(y Uint64x4, z Mask64x4) Uint64x4
+// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedPopCount(y Mask64x2) Uint64x2
-// Sub subtracts corresponding elements of two vectors.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPSUBQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) MaskedSub(y Uint64x4, z Mask64x4) Uint64x4
+// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedPopCount(y Mask64x4) Uint64x4
-// Xor performs a masked bitwise XOR operation between two vectors.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VPXORQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) MaskedXor(y Uint64x4, z Mask64x4) Uint64x4
+// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedPopCount(y Mask64x8) Uint64x8
-// Add adds corresponding elements of two vectors.
-//
-// Asm: VPADDQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) MaskedAdd(y Uint64x8, z Mask64x8) Uint64x8
+/* MaskedRoundSuppressExceptionWithPrecision */
-// And performs a masked bitwise AND operation between two vectors.
+// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
+// Const Immediate = 8.
//
-// Asm: VPANDQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) MaskedAnd(y Uint64x8, z Mask64x8) Uint64x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
+// Const Immediate = 8.
//
-// Asm: VPANDNQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) MaskedAndNot(y Uint64x8, z Mask64x8) Uint64x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
+// Const Immediate = 8.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) MaskedEqual(y Uint64x8, z Mask64x8) Mask64x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
-// Greater compares for greater than.
-// Const Immediate = 6.
+// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
+// Const Immediate = 8.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) MaskedGreater(y Uint64x8, z Mask64x8) Mask64x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
+// Const Immediate = 8.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) MaskedGreaterEqual(y Uint64x8, z Mask64x8) Mask64x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
-// Less compares for less than.
-// Const Immediate = 1.
+// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
+// Const Immediate = 8.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) MaskedLess(y Uint64x8, z Mask64x8) Mask64x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) MaskedLessEqual(y Uint64x8, z Mask64x8) Mask64x8
+/* MaskedRoundWithPrecision */
-// Max computes the maximum of corresponding elements.
+// RoundWithPrecision rounds elements with specified precision.
+// Const Immediate = 0.
//
-// Asm: VPMAXUQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) MaskedMax(y Uint64x8, z Mask64x8) Uint64x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedRoundWithPrecision(imm uint8, y Mask32x4) Float32x4
-// Min computes the minimum of corresponding elements.
+// RoundWithPrecision rounds elements with specified precision.
+// Const Immediate = 0.
//
-// Asm: VPMINUQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) MaskedMin(y Uint64x8, z Mask64x8) Uint64x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedRoundWithPrecision(imm uint8, y Mask32x8) Float32x8
-// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// RoundWithPrecision rounds elements with specified precision.
+// Const Immediate = 0.
//
-// Asm: VPMULUDQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) MaskedMulEvenWiden(y Uint64x8, z Mask64x8) Uint64x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedRoundWithPrecision(imm uint8, y Mask32x16) Float32x16
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// RoundWithPrecision rounds elements with specified precision.
+// Const Immediate = 0.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) MaskedNotEqual(y Uint64x8, z Mask64x8) Mask64x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedRoundWithPrecision(imm uint8, y Mask64x2) Float64x2
-// Or performs a masked bitwise OR operation between two vectors.
+// RoundWithPrecision rounds elements with specified precision.
+// Const Immediate = 0.
//
-// Asm: VPORQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) MaskedOr(y Uint64x8, z Mask64x8) Uint64x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedRoundWithPrecision(imm uint8, y Mask64x4) Float64x4
-// Sub subtracts corresponding elements of two vectors.
+// RoundWithPrecision rounds elements with specified precision.
+// Const Immediate = 0.
//
-// Asm: VPSUBQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) MaskedSub(y Uint64x8, z Mask64x8) Uint64x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedRoundWithPrecision(imm uint8, y Mask64x8) Float64x8
-// Xor performs a masked bitwise XOR operation between two vectors.
-//
-// Asm: VPXORQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) MaskedXor(y Uint64x8, z Mask64x8) Uint64x8
+/* MaskedSaturatedAdd */
-// Add adds corresponding elements of two vectors.
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VPADDB, CPU Feature: AVX512EVEX
-func (x Uint8x16) MaskedAdd(y Uint8x16, z Mask8x16) Uint8x16
+// Asm: VPADDSB, CPU Feature: AVX512EVEX
+func (x Int8x16) MaskedSaturatedAdd(y Int8x16, z Mask8x16) Int8x16
-// Average computes the rounded average of corresponding elements.
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VPAVGB, CPU Feature: AVX512EVEX
-func (x Uint8x16) MaskedAverage(y Uint8x16, z Mask8x16) Uint8x16
+// Asm: VPADDSB, CPU Feature: AVX512EVEX
+func (x Int8x32) MaskedSaturatedAdd(y Int8x32, z Mask8x32) Int8x32
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x16) MaskedEqual(y Uint8x16, z Mask8x16) Mask8x16
+// Asm: VPADDSB, CPU Feature: AVX512EVEX
+func (x Int8x64) MaskedSaturatedAdd(y Int8x64, z Mask8x64) Int8x64
-// Greater compares for greater than.
-// Const Immediate = 6.
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x16) MaskedGreater(y Uint8x16, z Mask8x16) Mask8x16
+// Asm: VPADDSW, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedSaturatedAdd(y Int16x8, z Mask16x8) Int16x8
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x16) MaskedGreaterEqual(y Uint8x16, z Mask8x16) Mask8x16
+// Asm: VPADDSW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedSaturatedAdd(y Int16x16, z Mask16x16) Int16x16
-// Less compares for less than.
-// Const Immediate = 1.
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x16) MaskedLess(y Uint8x16, z Mask8x16) Mask8x16
+// Asm: VPADDSW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedSaturatedAdd(y Int16x32, z Mask16x32) Int16x32
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x16) MaskedLessEqual(y Uint8x16, z Mask8x16) Mask8x16
+// Asm: VPADDSB, CPU Feature: AVX512EVEX
+func (x Uint8x16) MaskedSaturatedAdd(y Uint8x16, z Mask8x16) Uint8x16
-// Max computes the maximum of corresponding elements.
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VPMAXUB, CPU Feature: AVX512EVEX
-func (x Uint8x16) MaskedMax(y Uint8x16, z Mask8x16) Uint8x16
+// Asm: VPADDSB, CPU Feature: AVX512EVEX
+func (x Uint8x32) MaskedSaturatedAdd(y Uint8x32, z Mask8x32) Uint8x32
-// Min computes the minimum of corresponding elements.
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VPMINUB, CPU Feature: AVX512EVEX
-func (x Uint8x16) MaskedMin(y Uint8x16, z Mask8x16) Uint8x16
+// Asm: VPADDSB, CPU Feature: AVX512EVEX
+func (x Uint8x64) MaskedSaturatedAdd(y Uint8x64, z Mask8x64) Uint8x64
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x16) MaskedNotEqual(y Uint8x16, z Mask8x16) Mask8x16
+// Asm: VPADDSW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedSaturatedAdd(y Uint16x8, z Mask16x8) Uint16x8
// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VPADDSB, CPU Feature: AVX512EVEX
-func (x Uint8x16) MaskedSaturatedAdd(y Uint8x16, z Mask8x16) Uint8x16
+// Asm: VPADDSW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedSaturatedAdd(y Uint16x16, z Mask16x16) Uint16x16
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VPSUBSB, CPU Feature: AVX512EVEX
-func (x Uint8x16) MaskedSaturatedSub(y Uint8x16, z Mask8x16) Uint8x16
+// Asm: VPADDSW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedSaturatedAdd(y Uint16x32, z Mask16x32) Uint16x32
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBB, CPU Feature: AVX512EVEX
-func (x Uint8x16) MaskedSub(y Uint8x16, z Mask8x16) Uint8x16
+/* MaskedSaturatedPairDotProdAccumulate */
-// Add adds corresponding elements of two vectors.
+// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
//
-// Asm: VPADDB, CPU Feature: AVX512EVEX
-func (x Uint8x32) MaskedAdd(y Uint8x32, z Mask8x32) Uint8x32
+// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedSaturatedPairDotProdAccumulate(y Int16x8, z Int32x4, u Mask32x4) Int32x4
-// Average computes the rounded average of corresponding elements.
+// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
//
-// Asm: VPAVGB, CPU Feature: AVX512EVEX
-func (x Uint8x32) MaskedAverage(y Uint8x32, z Mask8x32) Uint8x32
+// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedSaturatedPairDotProdAccumulate(y Int16x16, z Int32x8, u Mask32x8) Int32x8
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x32) MaskedEqual(y Uint8x32, z Mask8x32) Mask8x32
+// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedSaturatedPairDotProdAccumulate(y Int16x32, z Int32x16, u Mask32x16) Int32x16
-// Greater compares for greater than.
-// Const Immediate = 6.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x32) MaskedGreater(y Uint8x32, z Mask8x32) Mask8x32
+/* MaskedSaturatedSub */
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x32) MaskedGreaterEqual(y Uint8x32, z Mask8x32) Mask8x32
+// Asm: VPSUBSB, CPU Feature: AVX512EVEX
+func (x Int8x16) MaskedSaturatedSub(y Int8x16, z Mask8x16) Int8x16
-// Less compares for less than.
-// Const Immediate = 1.
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x32) MaskedLess(y Uint8x32, z Mask8x32) Mask8x32
+// Asm: VPSUBSB, CPU Feature: AVX512EVEX
+func (x Int8x32) MaskedSaturatedSub(y Int8x32, z Mask8x32) Int8x32
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x32) MaskedLessEqual(y Uint8x32, z Mask8x32) Mask8x32
+// Asm: VPSUBSB, CPU Feature: AVX512EVEX
+func (x Int8x64) MaskedSaturatedSub(y Int8x64, z Mask8x64) Int8x64
-// Max computes the maximum of corresponding elements.
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VPMAXUB, CPU Feature: AVX512EVEX
-func (x Uint8x32) MaskedMax(y Uint8x32, z Mask8x32) Uint8x32
+// Asm: VPSUBSW, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedSaturatedSub(y Int16x8, z Mask16x8) Int16x8
-// Min computes the minimum of corresponding elements.
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VPMINUB, CPU Feature: AVX512EVEX
-func (x Uint8x32) MaskedMin(y Uint8x32, z Mask8x32) Uint8x32
+// Asm: VPSUBSW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedSaturatedSub(y Int16x16, z Mask16x16) Int16x16
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x32) MaskedNotEqual(y Uint8x32, z Mask8x32) Mask8x32
+// Asm: VPSUBSW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedSaturatedSub(y Int16x32, z Mask16x32) Int16x32
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VPADDSB, CPU Feature: AVX512EVEX
-func (x Uint8x32) MaskedSaturatedAdd(y Uint8x32, z Mask8x32) Uint8x32
+// Asm: VPSUBSB, CPU Feature: AVX512EVEX
+func (x Uint8x16) MaskedSaturatedSub(y Uint8x16, z Mask8x16) Uint8x16
// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSB, CPU Feature: AVX512EVEX
func (x Uint8x32) MaskedSaturatedSub(y Uint8x32, z Mask8x32) Uint8x32
-// Sub subtracts corresponding elements of two vectors.
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VPSUBB, CPU Feature: AVX512EVEX
-func (x Uint8x32) MaskedSub(y Uint8x32, z Mask8x32) Uint8x32
+// Asm: VPSUBSB, CPU Feature: AVX512EVEX
+func (x Uint8x64) MaskedSaturatedSub(y Uint8x64, z Mask8x64) Uint8x64
-// Add adds corresponding elements of two vectors.
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VPADDB, CPU Feature: AVX512EVEX
-func (x Uint8x64) MaskedAdd(y Uint8x64, z Mask8x64) Uint8x64
+// Asm: VPSUBSW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedSaturatedSub(y Uint16x8, z Mask16x8) Uint16x8
-// Average computes the rounded average of corresponding elements.
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VPAVGB, CPU Feature: AVX512EVEX
-func (x Uint8x64) MaskedAverage(y Uint8x64, z Mask8x64) Uint8x64
+// Asm: VPSUBSW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedSaturatedSub(y Uint16x16, z Mask16x16) Uint16x16
-// Equal compares for equality, masked.
-// Const Immediate = 0.
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x64) MaskedEqual(y Uint8x64, z Mask8x64) Mask8x64
+// Asm: VPSUBSW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedSaturatedSub(y Uint16x32, z Mask16x32) Uint16x32
-// Greater compares for greater than.
-// Const Immediate = 6.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x64) MaskedGreater(y Uint8x64, z Mask8x64) Mask8x64
+/* MaskedSaturatedUnsignedSignedPairDotProd */
-// GreaterEqual compares for greater than or equal.
-// Const Immediate = 5.
+// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
+// yielding a vector of half as many elements with twice the input element size.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x64) MaskedGreaterEqual(y Uint8x64, z Mask8x64) Mask8x64
+// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x8, z Mask16x8) Int16x8
-// Less compares for less than.
-// Const Immediate = 1.
+// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
+// yielding a vector of half as many elements with twice the input element size.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x64) MaskedLess(y Uint8x64, z Mask8x64) Mask8x64
+// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x16, z Mask16x16) Int16x16
-// LessEqual compares for less than or equal.
-// Const Immediate = 2.
+// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
+// yielding a vector of half as many elements with twice the input element size.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x64) MaskedLessEqual(y Uint8x64, z Mask8x64) Mask8x64
+// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x32, z Mask16x32) Int16x32
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUB, CPU Feature: AVX512EVEX
-func (x Uint8x64) MaskedMax(y Uint8x64, z Mask8x64) Uint8x64
+/* MaskedSaturatedUnsignedSignedQuadDotProdAccumulate */
-// Min computes the minimum of corresponding elements.
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
-// Asm: VPMINUB, CPU Feature: AVX512EVEX
-func (x Uint8x64) MaskedMin(y Uint8x64, z Mask8x64) Uint8x64
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Int32x4
-// NotEqual compares for inequality.
-// Const Immediate = 4.
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x64) MaskedNotEqual(y Uint8x64, z Mask8x64) Mask8x64
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Int32x8
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
-// Asm: VPADDSB, CPU Feature: AVX512EVEX
-func (x Uint8x64) MaskedSaturatedAdd(y Uint8x64, z Mask8x64) Uint8x64
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Int32x16
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
-// Asm: VPSUBSB, CPU Feature: AVX512EVEX
-func (x Uint8x64) MaskedSaturatedSub(y Uint8x64, z Mask8x64) Uint8x64
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Uint32x4
-// Sub subtracts corresponding elements of two vectors.
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
-// Asm: VPSUBB, CPU Feature: AVX512EVEX
-func (x Uint8x64) MaskedSub(y Uint8x64, z Mask8x64) Uint8x64
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Uint32x8
-// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
-// Asm: VFMADD132PS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedFusedMultiplyAdd132(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Uint32x16
-// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedFusedMultiplyAdd213(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+/* MaskedSqrt */
-// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
+// Sqrt computes the square root of each element.
//
-// Asm: VFMADD231PS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedFusedMultiplyAdd231(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+// Asm: VSQRTPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedSqrt(y Mask32x4) Float32x4
-// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
+// Sqrt computes the square root of each element.
//
-// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedFusedMultiplyAddSub132(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+// Asm: VSQRTPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedSqrt(y Mask32x8) Float32x8
-// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
+// Sqrt computes the square root of each element.
//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedFusedMultiplyAddSub213(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+// Asm: VSQRTPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedSqrt(y Mask32x16) Float32x16
-// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
+// Sqrt computes the square root of each element.
//
-// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedFusedMultiplyAddSub231(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+// Asm: VSQRTPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedSqrt(y Mask64x2) Float64x2
-// FusedMultiplySub132 performs `(v1 * v3) - v2`.
+// Sqrt computes the square root of each element.
//
-// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedFusedMultiplySub132(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+// Asm: VSQRTPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedSqrt(y Mask64x4) Float64x4
-// FusedMultiplySub213 performs `(v2 * v1) - v3`.
+// Sqrt computes the square root of each element.
//
-// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedFusedMultiplySub213(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+// Asm: VSQRTPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedSqrt(y Mask64x8) Float64x8
-// FusedMultiplySub231 performs `(v2 * v3) - v1`.
-//
-// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedFusedMultiplySub231(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+/* MaskedSub */
-// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedFusedMultiplySubAdd132(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+// Asm: VSUBPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedSub(y Float32x4, z Mask32x4) Float32x4
-// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedFusedMultiplySubAdd213(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+// Asm: VSUBPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedSub(y Float32x8, z Mask32x8) Float32x8
-// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedFusedMultiplySubAdd231(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+// Asm: VSUBPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedSub(y Float32x16, z Mask32x16) Float32x16
-// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedFusedNegativeMultiplyAdd132(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+// Asm: VSUBPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedSub(y Float64x2, z Mask64x2) Float64x2
-// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedFusedNegativeMultiplyAdd213(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+// Asm: VSUBPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedSub(y Float64x4, z Mask64x4) Float64x4
-// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedFusedNegativeMultiplyAdd231(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+// Asm: VSUBPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedSub(y Float64x8, z Mask64x8) Float64x8
-// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedFusedNegativeMultiplySub132(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+// Asm: VPSUBB, CPU Feature: AVX512EVEX
+func (x Int8x16) MaskedSub(y Int8x16, z Mask8x16) Int8x16
-// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedFusedNegativeMultiplySub213(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+// Asm: VPSUBB, CPU Feature: AVX512EVEX
+func (x Int8x32) MaskedSub(y Int8x32, z Mask8x32) Int8x32
-// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedFusedNegativeMultiplySub231(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+// Asm: VPSUBB, CPU Feature: AVX512EVEX
+func (x Int8x64) MaskedSub(y Int8x64, z Mask8x64) Int8x64
-// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFMADD132PS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedFusedMultiplyAdd132(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+// Asm: VPSUBW, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedSub(y Int16x8, z Mask16x8) Int16x8
-// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFMADD213PS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedFusedMultiplyAdd213(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+// Asm: VPSUBW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedSub(y Int16x16, z Mask16x16) Int16x16
-// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFMADD231PS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedFusedMultiplyAdd231(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+// Asm: VPSUBW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedSub(y Int16x32, z Mask16x32) Int16x32
-// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedFusedMultiplyAddSub132(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+// Asm: VPSUBD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedSub(y Int32x4, z Mask32x4) Int32x4
-// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedFusedMultiplyAddSub213(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+// Asm: VPSUBD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedSub(y Int32x8, z Mask32x8) Int32x8
-// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedFusedMultiplyAddSub231(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+// Asm: VPSUBD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedSub(y Int32x16, z Mask32x16) Int32x16
-// FusedMultiplySub132 performs `(v1 * v3) - v2`.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedFusedMultiplySub132(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+// Asm: VPSUBQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedSub(y Int64x2, z Mask64x2) Int64x2
-// FusedMultiplySub213 performs `(v2 * v1) - v3`.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedFusedMultiplySub213(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+// Asm: VPSUBQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedSub(y Int64x4, z Mask64x4) Int64x4
-// FusedMultiplySub231 performs `(v2 * v3) - v1`.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedFusedMultiplySub231(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+// Asm: VPSUBQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedSub(y Int64x8, z Mask64x8) Int64x8
-// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedFusedMultiplySubAdd132(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+// Asm: VPSUBB, CPU Feature: AVX512EVEX
+func (x Uint8x16) MaskedSub(y Uint8x16, z Mask8x16) Uint8x16
-// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedFusedMultiplySubAdd213(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+// Asm: VPSUBB, CPU Feature: AVX512EVEX
+func (x Uint8x32) MaskedSub(y Uint8x32, z Mask8x32) Uint8x32
-// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedFusedMultiplySubAdd231(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+// Asm: VPSUBB, CPU Feature: AVX512EVEX
+func (x Uint8x64) MaskedSub(y Uint8x64, z Mask8x64) Uint8x64
-// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedFusedNegativeMultiplyAdd132(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+// Asm: VPSUBW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedSub(y Uint16x8, z Mask16x8) Uint16x8
-// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedFusedNegativeMultiplyAdd213(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+// Asm: VPSUBW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedSub(y Uint16x16, z Mask16x16) Uint16x16
-// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedFusedNegativeMultiplyAdd231(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+// Asm: VPSUBW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedSub(y Uint16x32, z Mask16x32) Uint16x32
-// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedFusedNegativeMultiplySub132(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+// Asm: VPSUBD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedSub(y Uint32x4, z Mask32x4) Uint32x4
-// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedFusedNegativeMultiplySub213(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+// Asm: VPSUBD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedSub(y Uint32x8, z Mask32x8) Uint32x8
-// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedFusedNegativeMultiplySub231(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+// Asm: VPSUBD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedSub(y Uint32x16, z Mask32x16) Uint32x16
-// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFMADD132PS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedFusedMultiplyAdd132(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+// Asm: VPSUBQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedSub(y Uint64x2, z Mask64x2) Uint64x2
-// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFMADD213PS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedFusedMultiplyAdd213(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+// Asm: VPSUBQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedSub(y Uint64x4, z Mask64x4) Uint64x4
-// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VFMADD231PS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedFusedMultiplyAdd231(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+// Asm: VPSUBQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedSub(y Uint64x8, z Mask64x8) Uint64x8
-// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
-//
-// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedFusedMultiplyAddSub132(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+/* MaskedTruncSuppressExceptionWithPrecision */
-// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
+// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
+// Const Immediate = 11.
//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedFusedMultiplyAddSub213(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
-// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
+// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
+// Const Immediate = 11.
//
-// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedFusedMultiplyAddSub231(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
-// FusedMultiplySub132 performs `(v1 * v3) - v2`.
+// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
+// Const Immediate = 11.
//
-// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedFusedMultiplySub132(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
-// FusedMultiplySub213 performs `(v2 * v1) - v3`.
+// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
+// Const Immediate = 11.
//
-// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedFusedMultiplySub213(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
-// FusedMultiplySub231 performs `(v2 * v3) - v1`.
+// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
+// Const Immediate = 11.
//
-// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedFusedMultiplySub231(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
-// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
+// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
+// Const Immediate = 11.
//
-// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedFusedMultiplySubAdd132(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
-// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedFusedMultiplySubAdd213(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+/* MaskedTruncWithPrecision */
-// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
+// TruncWithPrecision truncates elements with specified precision.
+// Const Immediate = 3.
//
-// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedFusedMultiplySubAdd231(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedTruncWithPrecision(imm uint8, y Mask32x4) Float32x4
-// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
+// TruncWithPrecision truncates elements with specified precision.
+// Const Immediate = 3.
//
-// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedFusedNegativeMultiplyAdd132(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedTruncWithPrecision(imm uint8, y Mask32x8) Float32x8
-// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
+// TruncWithPrecision truncates elements with specified precision.
+// Const Immediate = 3.
//
-// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedFusedNegativeMultiplyAdd213(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedTruncWithPrecision(imm uint8, y Mask32x16) Float32x16
-// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
+// TruncWithPrecision truncates elements with specified precision.
+// Const Immediate = 3.
//
-// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedFusedNegativeMultiplyAdd231(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedTruncWithPrecision(imm uint8, y Mask64x2) Float64x2
-// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
+// TruncWithPrecision truncates elements with specified precision.
+// Const Immediate = 3.
//
-// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedFusedNegativeMultiplySub132(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedTruncWithPrecision(imm uint8, y Mask64x4) Float64x4
-// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
+// TruncWithPrecision truncates elements with specified precision.
+// Const Immediate = 3.
//
-// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedFusedNegativeMultiplySub213(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedTruncWithPrecision(imm uint8, y Mask64x8) Float64x8
-// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
-//
-// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedFusedNegativeMultiplySub231(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+/* MaskedUnsignedSignedQuadDotProdAccumulate */
-// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
-// Asm: VFMADD132PD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedFusedMultiplyAdd132(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Int32x4
-// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
-// Asm: VFMADD213PD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedFusedMultiplyAdd213(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Int32x8
-// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
-// Asm: VFMADD231PD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedFusedMultiplyAdd231(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Int32x16
-// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
-// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedFusedMultiplyAddSub132(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Uint32x4
-// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedFusedMultiplyAddSub213(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Uint32x8
-// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
-// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedFusedMultiplyAddSub231(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Uint32x16
-// FusedMultiplySub132 performs `(v1 * v3) - v2`.
-//
-// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedFusedMultiplySub132(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+/* MaskedXor */
-// FusedMultiplySub213 performs `(v2 * v1) - v3`.
+// Xor performs a masked bitwise XOR operation between two vectors.
//
-// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedFusedMultiplySub213(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+// Asm: VXORPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedXor(y Float32x4, z Mask32x4) Float32x4
-// FusedMultiplySub231 performs `(v2 * v3) - v1`.
+// Xor performs a masked bitwise XOR operation between two vectors.
//
-// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedFusedMultiplySub231(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+// Asm: VXORPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedXor(y Float32x8, z Mask32x8) Float32x8
-// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
+// Xor performs a masked bitwise XOR operation between two vectors.
//
-// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedFusedMultiplySubAdd132(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+// Asm: VXORPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedXor(y Float32x16, z Mask32x16) Float32x16
-// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
+// Xor performs a masked bitwise XOR operation between two vectors.
//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedFusedMultiplySubAdd213(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+// Asm: VXORPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedXor(y Float64x2, z Mask64x2) Float64x2
-// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
+// Xor performs a masked bitwise XOR operation between two vectors.
//
-// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedFusedMultiplySubAdd231(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+// Asm: VXORPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedXor(y Float64x4, z Mask64x4) Float64x4
-// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
+// Xor performs a masked bitwise XOR operation between two vectors.
//
-// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedFusedNegativeMultiplyAdd132(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+// Asm: VXORPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedXor(y Float64x8, z Mask64x8) Float64x8
-// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
+// Xor performs a masked bitwise XOR operation between two vectors.
//
-// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedFusedNegativeMultiplyAdd213(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+// Asm: VPXORD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedXor(y Int32x4, z Mask32x4) Int32x4
-// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
+// Xor performs a masked bitwise XOR operation between two vectors.
//
-// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedFusedNegativeMultiplyAdd231(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+// Asm: VPXORD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedXor(y Int32x8, z Mask32x8) Int32x8
-// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
+// Xor performs a masked bitwise XOR operation between two vectors.
//
-// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedFusedNegativeMultiplySub132(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+// Asm: VPXORD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedXor(y Int32x16, z Mask32x16) Int32x16
-// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
+// Xor performs a masked bitwise XOR operation between two vectors.
//
-// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedFusedNegativeMultiplySub213(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+// Asm: VPXORQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedXor(y Int64x2, z Mask64x2) Int64x2
-// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
+// Xor performs a masked bitwise XOR operation between two vectors.
//
-// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedFusedNegativeMultiplySub231(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+// Asm: VPXORQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedXor(y Int64x4, z Mask64x4) Int64x4
-// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
+// Xor performs a masked bitwise XOR operation between two vectors.
//
-// Asm: VFMADD132PD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedFusedMultiplyAdd132(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+// Asm: VPXORQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedXor(y Int64x8, z Mask64x8) Int64x8
-// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
+// Xor performs a masked bitwise XOR operation between two vectors.
//
-// Asm: VFMADD213PD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedFusedMultiplyAdd213(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+// Asm: VPXORD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedXor(y Uint32x4, z Mask32x4) Uint32x4
-// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
+// Xor performs a masked bitwise XOR operation between two vectors.
//
-// Asm: VFMADD231PD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedFusedMultiplyAdd231(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+// Asm: VPXORD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedXor(y Uint32x8, z Mask32x8) Uint32x8
-// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
+// Xor performs a masked bitwise XOR operation between two vectors.
//
-// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedFusedMultiplyAddSub132(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+// Asm: VPXORD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedXor(y Uint32x16, z Mask32x16) Uint32x16
-// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
+// Xor performs a masked bitwise XOR operation between two vectors.
//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedFusedMultiplyAddSub213(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+// Asm: VPXORQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedXor(y Uint64x2, z Mask64x2) Uint64x2
-// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
+// Xor performs a masked bitwise XOR operation between two vectors.
//
-// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedFusedMultiplyAddSub231(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+// Asm: VPXORQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedXor(y Uint64x4, z Mask64x4) Uint64x4
-// FusedMultiplySub132 performs `(v1 * v3) - v2`.
+// Xor performs a masked bitwise XOR operation between two vectors.
//
-// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedFusedMultiplySub132(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+// Asm: VPXORQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedXor(y Uint64x8, z Mask64x8) Uint64x8
-// FusedMultiplySub213 performs `(v2 * v1) - v3`.
-//
-// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedFusedMultiplySub213(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+/* Max */
-// FusedMultiplySub231 performs `(v2 * v3) - v1`.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedFusedMultiplySub231(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+// Asm: VMAXPS, CPU Feature: AVX
+func (x Float32x4) Max(y Float32x4) Float32x4
-// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedFusedMultiplySubAdd132(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+// Asm: VMAXPS, CPU Feature: AVX
+func (x Float32x8) Max(y Float32x8) Float32x8
-// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedFusedMultiplySubAdd213(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+// Asm: VMAXPS, CPU Feature: AVX512EVEX
+func (x Float32x16) Max(y Float32x16) Float32x16
-// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedFusedMultiplySubAdd231(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+// Asm: VMAXPD, CPU Feature: AVX
+func (x Float64x2) Max(y Float64x2) Float64x2
-// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedFusedNegativeMultiplyAdd132(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+// Asm: VMAXPD, CPU Feature: AVX
+func (x Float64x4) Max(y Float64x4) Float64x4
-// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedFusedNegativeMultiplyAdd213(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+// Asm: VMAXPD, CPU Feature: AVX512EVEX
+func (x Float64x8) Max(y Float64x8) Float64x8
-// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedFusedNegativeMultiplyAdd231(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+// Asm: VPMAXSB, CPU Feature: AVX
+func (x Int8x16) Max(y Int8x16) Int8x16
-// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedFusedNegativeMultiplySub132(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+// Asm: VPMAXSB, CPU Feature: AVX2
+func (x Int8x32) Max(y Int8x32) Int8x32
-// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedFusedNegativeMultiplySub213(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+// Asm: VPMAXSB, CPU Feature: AVX512EVEX
+func (x Int8x64) Max(y Int8x64) Int8x64
-// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedFusedNegativeMultiplySub231(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+// Asm: VPMAXSW, CPU Feature: AVX
+func (x Int16x8) Max(y Int16x8) Int16x8
-// FusedMultiplyAdd132 performs `(v1 * v3) + v2`.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFMADD132PD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedFusedMultiplyAdd132(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+// Asm: VPMAXSW, CPU Feature: AVX2
+func (x Int16x16) Max(y Int16x16) Int16x16
-// FusedMultiplyAdd213 performs `(v2 * v1) + v3`.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFMADD213PD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedFusedMultiplyAdd213(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+// Asm: VPMAXSW, CPU Feature: AVX512EVEX
+func (x Int16x32) Max(y Int16x32) Int16x32
-// FusedMultiplyAdd231 performs `(v2 * v3) + v1`.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFMADD231PD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedFusedMultiplyAdd231(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+// Asm: VPMAXSD, CPU Feature: AVX
+func (x Int32x4) Max(y Int32x4) Int32x4
-// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedFusedMultiplyAddSub132(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+// Asm: VPMAXSD, CPU Feature: AVX2
+func (x Int32x8) Max(y Int32x8) Int32x8
-// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedFusedMultiplyAddSub213(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+// Asm: VPMAXSD, CPU Feature: AVX512EVEX
+func (x Int32x16) Max(y Int32x16) Int32x16
-// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedFusedMultiplyAddSub231(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+// Asm: VPMAXSQ, CPU Feature: AVX512EVEX
+func (x Int64x2) Max(y Int64x2) Int64x2
-// FusedMultiplySub132 performs `(v1 * v3) - v2`.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedFusedMultiplySub132(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+// Asm: VPMAXSQ, CPU Feature: AVX512EVEX
+func (x Int64x4) Max(y Int64x4) Int64x4
-// FusedMultiplySub213 performs `(v2 * v1) - v3`.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedFusedMultiplySub213(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+// Asm: VPMAXSQ, CPU Feature: AVX512EVEX
+func (x Int64x8) Max(y Int64x8) Int64x8
-// FusedMultiplySub231 performs `(v2 * v3) - v1`.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedFusedMultiplySub231(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+// Asm: VPMAXUB, CPU Feature: AVX
+func (x Uint8x16) Max(y Uint8x16) Uint8x16
-// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedFusedMultiplySubAdd132(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+// Asm: VPMAXUB, CPU Feature: AVX2
+func (x Uint8x32) Max(y Uint8x32) Uint8x32
-// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedFusedMultiplySubAdd213(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+// Asm: VPMAXUB, CPU Feature: AVX512EVEX
+func (x Uint8x64) Max(y Uint8x64) Uint8x64
-// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedFusedMultiplySubAdd231(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+// Asm: VPMAXUW, CPU Feature: AVX
+func (x Uint16x8) Max(y Uint16x8) Uint16x8
-// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedFusedNegativeMultiplyAdd132(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+// Asm: VPMAXUW, CPU Feature: AVX2
+func (x Uint16x16) Max(y Uint16x16) Uint16x16
-// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedFusedNegativeMultiplyAdd213(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+// Asm: VPMAXUW, CPU Feature: AVX512EVEX
+func (x Uint16x32) Max(y Uint16x32) Uint16x32
-// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedFusedNegativeMultiplyAdd231(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+// Asm: VPMAXUD, CPU Feature: AVX
+func (x Uint32x4) Max(y Uint32x4) Uint32x4
-// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedFusedNegativeMultiplySub132(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+// Asm: VPMAXUD, CPU Feature: AVX2
+func (x Uint32x8) Max(y Uint32x8) Uint32x8
-// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedFusedNegativeMultiplySub213(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+// Asm: VPMAXUD, CPU Feature: AVX512EVEX
+func (x Uint32x16) Max(y Uint32x16) Uint32x16
-// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedFusedNegativeMultiplySub231(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+// Asm: VPMAXUQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) Max(y Uint64x2) Uint64x2
-// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
-func (x Int32x16) MaskedPairDotProdAccumulate(y Int16x32, z Int32x16, u Mask32x16) Int32x16
+// Asm: VPMAXUQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) Max(y Uint64x4) Uint64x4
-// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+// Max computes the maximum of corresponding elements.
//
-// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
-func (x Int32x16) MaskedSaturatedPairDotProdAccumulate(y Int16x32, z Int32x16, u Mask32x16) Int32x16
+// Asm: VPMAXUQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) Max(y Uint64x8) Uint64x8
-// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
-//
-// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
-func (x Int32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Int32x16
+/* Min */
-// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
-func (x Int32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Int32x16
+// Asm: VMINPS, CPU Feature: AVX
+func (x Float32x4) Min(y Float32x4) Float32x4
-// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
-func (x Int32x4) MaskedPairDotProdAccumulate(y Int16x8, z Int32x4, u Mask32x4) Int32x4
+// Asm: VMINPS, CPU Feature: AVX
+func (x Float32x8) Min(y Float32x8) Float32x8
-// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
-func (x Int32x4) MaskedSaturatedPairDotProdAccumulate(y Int16x8, z Int32x4, u Mask32x4) Int32x4
+// Asm: VMINPS, CPU Feature: AVX512EVEX
+func (x Float32x16) Min(y Float32x16) Float32x16
-// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
-func (x Int32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Int32x4
+// Asm: VMINPD, CPU Feature: AVX
+func (x Float64x2) Min(y Float64x2) Float64x2
-// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
-func (x Int32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Int32x4
+// Asm: VMINPD, CPU Feature: AVX
+func (x Float64x4) Min(y Float64x4) Float64x4
-// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
-func (x Int32x8) MaskedPairDotProdAccumulate(y Int16x16, z Int32x8, u Mask32x8) Int32x8
+// Asm: VMINPD, CPU Feature: AVX512EVEX
+func (x Float64x8) Min(y Float64x8) Float64x8
-// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
-func (x Int32x8) MaskedSaturatedPairDotProdAccumulate(y Int16x16, z Int32x8, u Mask32x8) Int32x8
+// Asm: VPMINSB, CPU Feature: AVX
+func (x Int8x16) Min(y Int8x16) Int8x16
-// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
-func (x Int32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Int32x8
+// Asm: VPMINSB, CPU Feature: AVX2
+func (x Int8x32) Min(y Int8x32) Int8x32
-// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
-func (x Int32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Int32x8
+// Asm: VPMINSB, CPU Feature: AVX512EVEX
+func (x Int8x64) Min(y Int8x64) Int8x64
-// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
-func (x Uint32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Uint32x16
+// Asm: VPMINSW, CPU Feature: AVX
+func (x Int16x8) Min(y Int16x8) Int16x8
-// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
-func (x Uint32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Uint32x16
+// Asm: VPMINSW, CPU Feature: AVX2
+func (x Int16x16) Min(y Int16x16) Int16x16
-// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
-func (x Uint32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Uint32x4
+// Asm: VPMINSW, CPU Feature: AVX512EVEX
+func (x Int16x32) Min(y Int16x32) Int16x32
-// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
-func (x Uint32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Uint32x4
+// Asm: VPMINSD, CPU Feature: AVX
+func (x Int32x4) Min(y Int32x4) Int32x4
-// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
-func (x Uint32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Uint32x8
+// Asm: VPMINSD, CPU Feature: AVX2
+func (x Int32x8) Min(y Int32x8) Int32x8
-// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
-func (x Uint32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Uint32x8
+// Asm: VPMINSD, CPU Feature: AVX512EVEX
+func (x Int32x16) Min(y Int32x16) Int32x16
-// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) CeilSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+// Asm: VPMINSQ, CPU Feature: AVX512EVEX
+func (x Int64x2) Min(y Int64x2) Int64x2
-// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) CeilSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+// Asm: VPMINSQ, CPU Feature: AVX512EVEX
+func (x Int64x4) Min(y Int64x4) Int64x4
-// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) CeilSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+// Asm: VPMINSQ, CPU Feature: AVX512EVEX
+func (x Int64x8) Min(y Int64x8) Int64x8
-// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) CeilSuppressExceptionWithPrecision(imm8 uint8) Float64x2
+// Asm: VPMINUB, CPU Feature: AVX
+func (x Uint8x16) Min(y Uint8x16) Uint8x16
-// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) CeilSuppressExceptionWithPrecision(imm8 uint8) Float64x4
+// Asm: VPMINUB, CPU Feature: AVX2
+func (x Uint8x32) Min(y Uint8x32) Uint8x32
-// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) CeilSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+// Asm: VPMINUB, CPU Feature: AVX512EVEX
+func (x Uint8x64) Min(y Uint8x64) Uint8x64
-// CeilWithPrecision rounds elements up with specified precision, masked.
-// Const Immediate = 2.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) CeilWithPrecision(imm8 uint8) Float32x16
+// Asm: VPMINUW, CPU Feature: AVX
+func (x Uint16x8) Min(y Uint16x8) Uint16x8
-// CeilWithPrecision rounds elements up with specified precision, masked.
-// Const Immediate = 2.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) CeilWithPrecision(imm8 uint8) Float32x4
+// Asm: VPMINUW, CPU Feature: AVX2
+func (x Uint16x16) Min(y Uint16x16) Uint16x16
-// CeilWithPrecision rounds elements up with specified precision, masked.
-// Const Immediate = 2.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) CeilWithPrecision(imm8 uint8) Float32x8
+// Asm: VPMINUW, CPU Feature: AVX512EVEX
+func (x Uint16x32) Min(y Uint16x32) Uint16x32
-// CeilWithPrecision rounds elements up with specified precision, masked.
-// Const Immediate = 2.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) CeilWithPrecision(imm8 uint8) Float64x2
+// Asm: VPMINUD, CPU Feature: AVX
+func (x Uint32x4) Min(y Uint32x4) Uint32x4
-// CeilWithPrecision rounds elements up with specified precision, masked.
-// Const Immediate = 2.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) CeilWithPrecision(imm8 uint8) Float64x4
+// Asm: VPMINUD, CPU Feature: AVX2
+func (x Uint32x8) Min(y Uint32x8) Uint32x8
-// CeilWithPrecision rounds elements up with specified precision, masked.
-// Const Immediate = 2.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) CeilWithPrecision(imm8 uint8) Float64x8
+// Asm: VPMINUD, CPU Feature: AVX512EVEX
+func (x Uint32x16) Min(y Uint32x16) Uint32x16
-// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+// Asm: VPMINUQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) Min(y Uint64x2) Uint64x2
-// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+// Asm: VPMINUQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) Min(y Uint64x4) Uint64x4
-// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+// Min computes the minimum of corresponding elements.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+// Asm: VPMINUQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) Min(y Uint64x8) Uint64x8
-// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
-// Const Immediate = 10.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float64x2
+/* Mul */
-// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+// Mul multiplies corresponding elements of two vectors.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float64x4
+// Asm: VMULPS, CPU Feature: AVX
+func (x Float32x4) Mul(y Float32x4) Float32x4
-// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+// Mul multiplies corresponding elements of two vectors.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+// Asm: VMULPS, CPU Feature: AVX
+func (x Float32x8) Mul(y Float32x8) Float32x8
-// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
-// Const Immediate = 2.
+// Mul multiplies corresponding elements of two vectors, masked.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) DiffWithCeilWithPrecision(imm8 uint8) Float32x16
+// Asm: VMULPS, CPU Feature: AVX512EVEX
+func (x Float32x16) Mul(y Float32x16) Float32x16
-// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
-// Const Immediate = 2.
+// Mul multiplies corresponding elements of two vectors.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) DiffWithCeilWithPrecision(imm8 uint8) Float32x4
+// Asm: VMULPD, CPU Feature: AVX
+func (x Float64x2) Mul(y Float64x2) Float64x2
-// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
-// Const Immediate = 2.
+// Mul multiplies corresponding elements of two vectors.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) DiffWithCeilWithPrecision(imm8 uint8) Float32x8
+// Asm: VMULPD, CPU Feature: AVX
+func (x Float64x4) Mul(y Float64x4) Float64x4
-// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
-// Const Immediate = 2.
+// Mul multiplies corresponding elements of two vectors, masked.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) DiffWithCeilWithPrecision(imm8 uint8) Float64x2
+// Asm: VMULPD, CPU Feature: AVX512EVEX
+func (x Float64x8) Mul(y Float64x8) Float64x8
-// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
-// Const Immediate = 2.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) DiffWithCeilWithPrecision(imm8 uint8) Float64x4
+/* MulByPowOf2 */
-// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
-// Const Immediate = 2.
+// MulByPowOf2 multiplies elements by a power of 2.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) DiffWithCeilWithPrecision(imm8 uint8) Float64x8
+// Asm: VSCALEFPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MulByPowOf2(y Float32x4) Float32x4
-// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
-// Const Immediate = 9.
+// MulByPowOf2 multiplies elements by a power of 2.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+// Asm: VSCALEFPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MulByPowOf2(y Float32x8) Float32x8
-// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
-// Const Immediate = 9.
+// MulByPowOf2 multiplies elements by a power of 2.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+// Asm: VSCALEFPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MulByPowOf2(y Float32x16) Float32x16
-// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
-// Const Immediate = 9.
+// MulByPowOf2 multiplies elements by a power of 2.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+// Asm: VSCALEFPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MulByPowOf2(y Float64x2) Float64x2
-// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
-// Const Immediate = 9.
+// MulByPowOf2 multiplies elements by a power of 2.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float64x2
+// Asm: VSCALEFPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MulByPowOf2(y Float64x4) Float64x4
-// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
-// Const Immediate = 9.
+// MulByPowOf2 multiplies elements by a power of 2.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float64x4
+// Asm: VSCALEFPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MulByPowOf2(y Float64x8) Float64x8
-// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
-// Const Immediate = 9.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+/* MulEvenWiden */
-// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
-// Const Immediate = 1.
+// MulEvenWiden multiplies even-indexed elements, widening the result.
+// Result[i] = v1.Even[i] * v2.Even[i].
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) DiffWithFloorWithPrecision(imm8 uint8) Float32x16
+// Asm: VPMULDQ, CPU Feature: AVX
+func (x Int32x4) MulEvenWiden(y Int32x4) Int64x2
-// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
-// Const Immediate = 1.
+// MulEvenWiden multiplies even-indexed elements, widening the result.
+// Result[i] = v1.Even[i] * v2.Even[i].
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) DiffWithFloorWithPrecision(imm8 uint8) Float32x4
+// Asm: VPMULDQ, CPU Feature: AVX2
+func (x Int32x8) MulEvenWiden(y Int32x8) Int64x4
-// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
-// Const Immediate = 1.
+// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
+// Result[i] = v1.Even[i] * v2.Even[i].
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) DiffWithFloorWithPrecision(imm8 uint8) Float32x8
+// Asm: VPMULDQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MulEvenWiden(y Int64x2) Int64x2
-// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
-// Const Immediate = 1.
+// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
+// Result[i] = v1.Even[i] * v2.Even[i].
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) DiffWithFloorWithPrecision(imm8 uint8) Float64x2
+// Asm: VPMULDQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MulEvenWiden(y Int64x4) Int64x4
-// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
-// Const Immediate = 1.
+// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
+// Result[i] = v1.Even[i] * v2.Even[i].
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) DiffWithFloorWithPrecision(imm8 uint8) Float64x4
+// Asm: VPMULDQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MulEvenWiden(y Int64x8) Int64x8
-// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
-// Const Immediate = 1.
+// MulEvenWiden multiplies even-indexed elements, widening the result.
+// Result[i] = v1.Even[i] * v2.Even[i].
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) DiffWithFloorWithPrecision(imm8 uint8) Float64x8
+// Asm: VPMULUDQ, CPU Feature: AVX
+func (x Uint32x4) MulEvenWiden(y Uint32x4) Uint64x2
-// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
-// Const Immediate = 8.
+// MulEvenWiden multiplies even-indexed elements, widening the result.
+// Result[i] = v1.Even[i] * v2.Even[i].
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+// Asm: VPMULUDQ, CPU Feature: AVX2
+func (x Uint32x8) MulEvenWiden(y Uint32x8) Uint64x4
+
+// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
+// Result[i] = v1.Even[i] * v2.Even[i].
+//
+// Asm: VPMULUDQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MulEvenWiden(y Uint64x2) Uint64x2
+
+// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
+// Result[i] = v1.Even[i] * v2.Even[i].
+//
+// Asm: VPMULUDQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MulEvenWiden(y Uint64x4) Uint64x4
+
+// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
+// Result[i] = v1.Even[i] * v2.Even[i].
+//
+// Asm: VPMULUDQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MulEvenWiden(y Uint64x8) Uint64x8
+
+/* MulHigh */
+
+// MulHigh multiplies elements and stores the high part of the result.
+//
+// Asm: VPMULHW, CPU Feature: AVX
+func (x Int16x8) MulHigh(y Int16x8) Int16x8
+
+// MulHigh multiplies elements and stores the high part of the result.
+//
+// Asm: VPMULHW, CPU Feature: AVX2
+func (x Int16x16) MulHigh(y Int16x16) Int16x16
+
+// MulHigh multiplies elements and stores the high part of the result, masked.
+//
+// Asm: VPMULHW, CPU Feature: AVX512EVEX
+func (x Int16x32) MulHigh(y Int16x32) Int16x32
+
+// MulHigh multiplies elements and stores the high part of the result.
+//
+// Asm: VPMULHUW, CPU Feature: AVX
+func (x Uint16x8) MulHigh(y Uint16x8) Uint16x8
+
+// MulHigh multiplies elements and stores the high part of the result.
+//
+// Asm: VPMULHUW, CPU Feature: AVX2
+func (x Uint16x16) MulHigh(y Uint16x16) Uint16x16
+
+// MulHigh multiplies elements and stores the high part of the result, masked.
+//
+// Asm: VPMULHUW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32
+
+/* MulLow */
+
+// MulLow multiplies elements and stores the low part of the result.
+//
+// Asm: VPMULLW, CPU Feature: AVX
+func (x Int16x8) MulLow(y Int16x8) Int16x8
+
+// MulLow multiplies elements and stores the low part of the result.
+//
+// Asm: VPMULLW, CPU Feature: AVX2
+func (x Int16x16) MulLow(y Int16x16) Int16x16
+
+// MulLow multiplies elements and stores the low part of the result, masked.
+//
+// Asm: VPMULLW, CPU Feature: AVX512EVEX
+func (x Int16x32) MulLow(y Int16x32) Int16x32
+
+// MulLow multiplies elements and stores the low part of the result.
+//
+// Asm: VPMULLD, CPU Feature: AVX
+func (x Int32x4) MulLow(y Int32x4) Int32x4
+
+// MulLow multiplies elements and stores the low part of the result.
+//
+// Asm: VPMULLD, CPU Feature: AVX2
+func (x Int32x8) MulLow(y Int32x8) Int32x8
+
+// MulLow multiplies elements and stores the low part of the result, masked.
+//
+// Asm: VPMULLD, CPU Feature: AVX512EVEX
+func (x Int32x16) MulLow(y Int32x16) Int32x16
+
+// MulLow multiplies elements and stores the low part of the result, masked.
+//
+// Asm: VPMULLQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MulLow(y Int64x2) Int64x2
+
+// MulLow multiplies elements and stores the low part of the result, masked.
+//
+// Asm: VPMULLQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MulLow(y Int64x4) Int64x4
+
+// MulLow multiplies elements and stores the low part of the result, masked.
+//
+// Asm: VPMULLQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MulLow(y Int64x8) Int64x8
+
+/* NotEqual */
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x4) NotEqual(y Float32x4) Mask32x4
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x8) NotEqual(y Float32x8) Mask32x8
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VCMPPS, CPU Feature: AVX512EVEX
+func (x Float32x16) NotEqual(y Float32x16) Mask32x16
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x2) NotEqual(y Float64x2) Mask64x2
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x4) NotEqual(y Float64x4) Mask64x4
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VCMPPD, CPU Feature: AVX512EVEX
+func (x Float64x8) NotEqual(y Float64x8) Mask64x8
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x16) NotEqual(y Int8x16) Mask8x16
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x32) NotEqual(y Int8x32) Mask8x32
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPB, CPU Feature: AVX512EVEX
+func (x Int8x64) NotEqual(y Int8x64) Mask8x64
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x8) NotEqual(y Int16x8) Mask16x8
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x16) NotEqual(y Int16x16) Mask16x16
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPW, CPU Feature: AVX512EVEX
+func (x Int16x32) NotEqual(y Int16x32) Mask16x32
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x4) NotEqual(y Int32x4) Mask32x4
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x8) NotEqual(y Int32x8) Mask32x8
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPD, CPU Feature: AVX512EVEX
+func (x Int32x16) NotEqual(y Int32x16) Mask32x16
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x2) NotEqual(y Int64x2) Mask64x2
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x4) NotEqual(y Int64x4) Mask64x4
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+func (x Int64x8) NotEqual(y Int64x8) Mask64x8
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x16) NotEqual(y Uint8x16) Mask8x16
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x32) NotEqual(y Uint8x32) Mask8x32
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x8) NotEqual(y Uint16x8) Mask16x8
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x16) NotEqual(y Uint16x16) Mask16x16
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x4) NotEqual(y Uint32x4) Mask32x4
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x8) NotEqual(y Uint32x8) Mask32x8
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) NotEqual(y Uint64x2) Mask64x2
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4
+
+// NotEqual compares for inequality.
+// Const Immediate = 4.
+//
+// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8
+
+/* Or */
+
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VORPS, CPU Feature: AVX
+func (x Float32x4) Or(y Float32x4) Float32x4
+
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VORPS, CPU Feature: AVX
+func (x Float32x8) Or(y Float32x8) Float32x8
+
+// Or performs a masked bitwise OR operation between two vectors.
+//
+// Asm: VORPS, CPU Feature: AVX512EVEX
+func (x Float32x16) Or(y Float32x16) Float32x16
+
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VORPD, CPU Feature: AVX
+func (x Float64x2) Or(y Float64x2) Float64x2
+
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VORPD, CPU Feature: AVX
+func (x Float64x4) Or(y Float64x4) Float64x4
+
+// Or performs a masked bitwise OR operation between two vectors.
+//
+// Asm: VORPD, CPU Feature: AVX512EVEX
+func (x Float64x8) Or(y Float64x8) Float64x8
+
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VPOR, CPU Feature: AVX
+func (x Int8x16) Or(y Int8x16) Int8x16
+
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VPOR, CPU Feature: AVX2
+func (x Int8x32) Or(y Int8x32) Int8x32
+
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VPOR, CPU Feature: AVX
+func (x Int16x8) Or(y Int16x8) Int16x8
+
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VPOR, CPU Feature: AVX2
+func (x Int16x16) Or(y Int16x16) Int16x16
+
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VPOR, CPU Feature: AVX
+func (x Int32x4) Or(y Int32x4) Int32x4
+
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VPOR, CPU Feature: AVX2
+func (x Int32x8) Or(y Int32x8) Int32x8
+
+// Or performs a masked bitwise OR operation between two vectors.
+//
+// Asm: VPORD, CPU Feature: AVX512EVEX
+func (x Int32x16) Or(y Int32x16) Int32x16
+
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VPOR, CPU Feature: AVX
+func (x Int64x2) Or(y Int64x2) Int64x2
+
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VPOR, CPU Feature: AVX2
+func (x Int64x4) Or(y Int64x4) Int64x4
+
+// Or performs a masked bitwise OR operation between two vectors.
+//
+// Asm: VPORQ, CPU Feature: AVX512EVEX
+func (x Int64x8) Or(y Int64x8) Int64x8
+
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VPOR, CPU Feature: AVX
+func (x Uint8x16) Or(y Uint8x16) Uint8x16
+
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VPOR, CPU Feature: AVX2
+func (x Uint8x32) Or(y Uint8x32) Uint8x32
+
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VPOR, CPU Feature: AVX
+func (x Uint16x8) Or(y Uint16x8) Uint16x8
+
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VPOR, CPU Feature: AVX2
+func (x Uint16x16) Or(y Uint16x16) Uint16x16
+
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VPOR, CPU Feature: AVX
+func (x Uint32x4) Or(y Uint32x4) Uint32x4
+
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VPOR, CPU Feature: AVX2
+func (x Uint32x8) Or(y Uint32x8) Uint32x8
+
+// Or performs a masked bitwise OR operation between two vectors.
+//
+// Asm: VPORD, CPU Feature: AVX512EVEX
+func (x Uint32x16) Or(y Uint32x16) Uint32x16
+
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VPOR, CPU Feature: AVX
+func (x Uint64x2) Or(y Uint64x2) Uint64x2
+
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VPOR, CPU Feature: AVX2
+func (x Uint64x4) Or(y Uint64x4) Uint64x4
+
+// Or performs a masked bitwise OR operation between two vectors.
+//
+// Asm: VPORQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) Or(y Uint64x8) Uint64x8
+
+/* PairDotProd */
+
+// PairDotProd multiplies the elements and add the pairs together,
+// yielding a vector of half as many elements with twice the input element size.
+//
+// Asm: VPMADDWD, CPU Feature: AVX
+func (x Int16x8) PairDotProd(y Int16x8) Int32x4
+
+// PairDotProd multiplies the elements and add the pairs together,
+// yielding a vector of half as many elements with twice the input element size.
+//
+// Asm: VPMADDWD, CPU Feature: AVX2
+func (x Int16x16) PairDotProd(y Int16x16) Int32x8
-// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
-// Const Immediate = 8.
+// PairDotProd multiplies the elements and add the pairs together,
+// yielding a vector of half as many elements with twice the input element size.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+// Asm: VPMADDWD, CPU Feature: AVX512EVEX
+func (x Int16x32) PairDotProd(y Int16x32) Int32x16
-// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
-// Const Immediate = 8.
+/* PairDotProdAccumulate */
+
+// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+// Asm: VPDPWSSD, CPU Feature: AVX_VNNI
+func (x Int32x4) PairDotProdAccumulate(y Int32x4, z Int32x4) Int32x4
-// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
-// Const Immediate = 8.
+// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float64x2
+// Asm: VPDPWSSD, CPU Feature: AVX_VNNI
+func (x Int32x8) PairDotProdAccumulate(y Int32x8, z Int32x8) Int32x8
-// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
-// Const Immediate = 8.
+// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float64x4
+// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
+func (x Int32x16) PairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16
-// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
-// Const Immediate = 8.
+/* PairwiseAdd */
+
+// PairwiseAdd horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+// Asm: VHADDPS, CPU Feature: AVX
+func (x Float32x4) PairwiseAdd(y Float32x4) Float32x4
-// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
-// Const Immediate = 0.
+// PairwiseAdd horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) DiffWithRoundWithPrecision(imm8 uint8) Float32x16
+// Asm: VHADDPS, CPU Feature: AVX
+func (x Float32x8) PairwiseAdd(y Float32x8) Float32x8
-// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
-// Const Immediate = 0.
+// PairwiseAdd horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) DiffWithRoundWithPrecision(imm8 uint8) Float32x4
+// Asm: VHADDPD, CPU Feature: AVX
+func (x Float64x2) PairwiseAdd(y Float64x2) Float64x2
-// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
-// Const Immediate = 0.
+// PairwiseAdd horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) DiffWithRoundWithPrecision(imm8 uint8) Float32x8
+// Asm: VHADDPD, CPU Feature: AVX
+func (x Float64x4) PairwiseAdd(y Float64x4) Float64x4
-// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
-// Const Immediate = 0.
+// PairwiseAdd horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) DiffWithRoundWithPrecision(imm8 uint8) Float64x2
+// Asm: VPHADDW, CPU Feature: AVX
+func (x Int16x8) PairwiseAdd(y Int16x8) Int16x8
-// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
-// Const Immediate = 0.
+// PairwiseAdd horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) DiffWithRoundWithPrecision(imm8 uint8) Float64x4
+// Asm: VPHADDW, CPU Feature: AVX2
+func (x Int16x16) PairwiseAdd(y Int16x16) Int16x16
-// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
-// Const Immediate = 0.
+// PairwiseAdd horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) DiffWithRoundWithPrecision(imm8 uint8) Float64x8
+// Asm: VPHADDD, CPU Feature: AVX
+func (x Int32x4) PairwiseAdd(y Int32x4) Int32x4
-// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
-// Const Immediate = 11.
+// PairwiseAdd horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+// Asm: VPHADDD, CPU Feature: AVX2
+func (x Int32x8) PairwiseAdd(y Int32x8) Int32x8
-// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
-// Const Immediate = 11.
+// PairwiseAdd horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+// Asm: VPHADDW, CPU Feature: AVX
+func (x Uint16x8) PairwiseAdd(y Uint16x8) Uint16x8
-// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
-// Const Immediate = 11.
+// PairwiseAdd horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+// Asm: VPHADDW, CPU Feature: AVX2
+func (x Uint16x16) PairwiseAdd(y Uint16x16) Uint16x16
-// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
-// Const Immediate = 11.
+// PairwiseAdd horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float64x2
+// Asm: VPHADDD, CPU Feature: AVX
+func (x Uint32x4) PairwiseAdd(y Uint32x4) Uint32x4
-// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
-// Const Immediate = 11.
+// PairwiseAdd horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float64x4
+// Asm: VPHADDD, CPU Feature: AVX2
+func (x Uint32x8) PairwiseAdd(y Uint32x8) Uint32x8
-// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
-// Const Immediate = 11.
+/* PairwiseSub */
+
+// PairwiseSub horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+// Asm: VHSUBPS, CPU Feature: AVX
+func (x Float32x4) PairwiseSub(y Float32x4) Float32x4
-// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
-// Const Immediate = 3.
+// PairwiseSub horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VHSUBPS, CPU Feature: AVX
+func (x Float32x8) PairwiseSub(y Float32x8) Float32x8
+
+// PairwiseSub horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VHSUBPD, CPU Feature: AVX
+func (x Float64x2) PairwiseSub(y Float64x2) Float64x2
+
+// PairwiseSub horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VHSUBPD, CPU Feature: AVX
+func (x Float64x4) PairwiseSub(y Float64x4) Float64x4
+
+// PairwiseSub horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VPHSUBW, CPU Feature: AVX
+func (x Int16x8) PairwiseSub(y Int16x8) Int16x8
+
+// PairwiseSub horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VPHSUBW, CPU Feature: AVX2
+func (x Int16x16) PairwiseSub(y Int16x16) Int16x16
+
+// PairwiseSub horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VPHSUBD, CPU Feature: AVX
+func (x Int32x4) PairwiseSub(y Int32x4) Int32x4
+
+// PairwiseSub horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VPHSUBD, CPU Feature: AVX2
+func (x Int32x8) PairwiseSub(y Int32x8) Int32x8
+
+// PairwiseSub horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VPHSUBW, CPU Feature: AVX
+func (x Uint16x8) PairwiseSub(y Uint16x8) Uint16x8
+
+// PairwiseSub horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VPHSUBW, CPU Feature: AVX2
+func (x Uint16x16) PairwiseSub(y Uint16x16) Uint16x16
+
+// PairwiseSub horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VPHSUBD, CPU Feature: AVX
+func (x Uint32x4) PairwiseSub(y Uint32x4) Uint32x4
+
+// PairwiseSub horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VPHSUBD, CPU Feature: AVX2
+func (x Uint32x8) PairwiseSub(y Uint32x8) Uint32x8
+
+/* PopCount */
+
+// PopCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+func (x Int8x16) PopCount() Int8x16
+
+// PopCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+func (x Int8x32) PopCount() Int8x32
+
+// PopCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+func (x Int8x64) PopCount() Int8x64
+
+// PopCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+func (x Int16x8) PopCount() Int16x8
+
+// PopCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+func (x Int16x16) PopCount() Int16x16
+
+// PopCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+func (x Int16x32) PopCount() Int16x32
+
+// PopCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+func (x Int32x4) PopCount() Int32x4
+
+// PopCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+func (x Int32x8) PopCount() Int32x8
+
+// PopCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+func (x Int32x16) PopCount() Int32x16
+
+// PopCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+func (x Int64x2) PopCount() Int64x2
+
+// PopCount counts the number of set bits in each element.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) DiffWithTruncWithPrecision(imm8 uint8) Float32x16
+// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+func (x Int64x4) PopCount() Int64x4
-// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
-// Const Immediate = 3.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) DiffWithTruncWithPrecision(imm8 uint8) Float32x4
+// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+func (x Int64x8) PopCount() Int64x8
-// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
-// Const Immediate = 3.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) DiffWithTruncWithPrecision(imm8 uint8) Float32x8
+// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+func (x Uint8x16) PopCount() Uint8x16
-// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
-// Const Immediate = 3.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) DiffWithTruncWithPrecision(imm8 uint8) Float64x2
+// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+func (x Uint8x32) PopCount() Uint8x32
-// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
-// Const Immediate = 3.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) DiffWithTruncWithPrecision(imm8 uint8) Float64x4
+// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+func (x Uint8x64) PopCount() Uint8x64
-// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
-// Const Immediate = 3.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) DiffWithTruncWithPrecision(imm8 uint8) Float64x8
+// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+func (x Uint16x8) PopCount() Uint16x8
-// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
-// Const Immediate = 9.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) FloorSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+func (x Uint16x16) PopCount() Uint16x16
-// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
-// Const Immediate = 9.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) FloorSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+func (x Uint16x32) PopCount() Uint16x32
-// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
-// Const Immediate = 9.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) FloorSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+func (x Uint32x4) PopCount() Uint32x4
-// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
-// Const Immediate = 9.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) FloorSuppressExceptionWithPrecision(imm8 uint8) Float64x2
+// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+func (x Uint32x8) PopCount() Uint32x8
-// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
-// Const Immediate = 9.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) FloorSuppressExceptionWithPrecision(imm8 uint8) Float64x4
+// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+func (x Uint32x16) PopCount() Uint32x16
-// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
-// Const Immediate = 9.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) FloorSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) PopCount() Uint64x2
-// FloorWithPrecision rounds elements down with specified precision, masked.
-// Const Immediate = 1.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) FloorWithPrecision(imm8 uint8) Float32x16
+// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) PopCount() Uint64x4
-// FloorWithPrecision rounds elements down with specified precision, masked.
-// Const Immediate = 1.
+// PopCount counts the number of set bits in each element.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) FloorWithPrecision(imm8 uint8) Float32x4
+// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) PopCount() Uint64x8
-// FloorWithPrecision rounds elements down with specified precision, masked.
-// Const Immediate = 1.
+/* Round */
+
+// Round rounds elements to the nearest integer.
+// Const Immediate = 0.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) FloorWithPrecision(imm8 uint8) Float32x8
+// Asm: VROUNDPS, CPU Feature: AVX
+func (x Float32x4) Round() Float32x4
-// FloorWithPrecision rounds elements down with specified precision, masked.
-// Const Immediate = 1.
+// Round rounds elements to the nearest integer.
+// Const Immediate = 0.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) FloorWithPrecision(imm8 uint8) Float64x2
+// Asm: VROUNDPS, CPU Feature: AVX
+func (x Float32x8) Round() Float32x8
-// FloorWithPrecision rounds elements down with specified precision, masked.
-// Const Immediate = 1.
+// Round rounds elements to the nearest integer.
+// Const Immediate = 0.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) FloorWithPrecision(imm8 uint8) Float64x4
+// Asm: VROUNDPD, CPU Feature: AVX
+func (x Float64x2) Round() Float64x2
-// FloorWithPrecision rounds elements down with specified precision, masked.
-// Const Immediate = 1.
+// Round rounds elements to the nearest integer.
+// Const Immediate = 0.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) FloorWithPrecision(imm8 uint8) Float64x8
+// Asm: VROUNDPD, CPU Feature: AVX
+func (x Float64x4) Round() Float64x4
+
+/* RoundSuppressExceptionWithPrecision */
// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
// Const Immediate = 8.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+func (x Float32x4) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x4
// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
// Const Immediate = 8.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+func (x Float32x8) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x8
// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
// Const Immediate = 8.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+func (x Float32x16) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x16
// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
// Const Immediate = 8.
// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
func (x Float64x8) RoundSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+/* RoundWithPrecision */
+
// RoundWithPrecision rounds elements with specified precision.
// Const Immediate = 0.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) RoundWithPrecision(imm8 uint8) Float32x16
+func (x Float32x4) RoundWithPrecision(imm8 uint8) Float32x4
// RoundWithPrecision rounds elements with specified precision.
// Const Immediate = 0.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) RoundWithPrecision(imm8 uint8) Float32x4
+func (x Float32x8) RoundWithPrecision(imm8 uint8) Float32x8
// RoundWithPrecision rounds elements with specified precision.
// Const Immediate = 0.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) RoundWithPrecision(imm8 uint8) Float32x8
+func (x Float32x16) RoundWithPrecision(imm8 uint8) Float32x16
// RoundWithPrecision rounds elements with specified precision.
// Const Immediate = 0.
// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
func (x Float64x8) RoundWithPrecision(imm8 uint8) Float64x8
-// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
-// Const Immediate = 11.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x16
-
-// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
-// Const Immediate = 11.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x4
-
-// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
-// Const Immediate = 11.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x8
-
-// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
-// Const Immediate = 11.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x2
-
-// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
-// Const Immediate = 11.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x4
-
-// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
-// Const Immediate = 11.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x8
-
-// TruncWithPrecision truncates elements with specified precision.
-// Const Immediate = 3.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) TruncWithPrecision(imm8 uint8) Float32x16
-
-// TruncWithPrecision truncates elements with specified precision.
-// Const Immediate = 3.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) TruncWithPrecision(imm8 uint8) Float32x4
-
-// TruncWithPrecision truncates elements with specified precision.
-// Const Immediate = 3.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) TruncWithPrecision(imm8 uint8) Float32x8
+/* SaturatedAdd */
-// TruncWithPrecision truncates elements with specified precision.
-// Const Immediate = 3.
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) TruncWithPrecision(imm8 uint8) Float64x2
+// Asm: VPADDSB, CPU Feature: AVX
+func (x Int8x16) SaturatedAdd(y Int8x16) Int8x16
-// TruncWithPrecision truncates elements with specified precision.
-// Const Immediate = 3.
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) TruncWithPrecision(imm8 uint8) Float64x4
+// Asm: VPADDSB, CPU Feature: AVX2
+func (x Int8x32) SaturatedAdd(y Int8x32) Int8x32
-// TruncWithPrecision truncates elements with specified precision.
-// Const Immediate = 3.
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) TruncWithPrecision(imm8 uint8) Float64x8
+// Asm: VPADDSB, CPU Feature: AVX512EVEX
+func (x Int8x64) SaturatedAdd(y Int8x64) Int8x64
-// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
+// Asm: VPADDSW, CPU Feature: AVX
+func (x Int16x8) SaturatedAdd(y Int16x8) Int16x8
-// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
+// Asm: VPADDSW, CPU Feature: AVX2
+func (x Int16x16) SaturatedAdd(y Int16x16) Int16x16
-// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
+// Asm: VPADDSW, CPU Feature: AVX512EVEX
+func (x Int16x32) SaturatedAdd(y Int16x32) Int16x32
-// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
+// Asm: VPADDSB, CPU Feature: AVX
+func (x Uint8x16) SaturatedAdd(y Uint8x16) Uint8x16
-// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
-
-// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+// Asm: VPADDSB, CPU Feature: AVX2
+func (x Uint8x32) SaturatedAdd(y Uint8x32) Uint8x32
+
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
+// Asm: VPADDSB, CPU Feature: AVX512EVEX
+func (x Uint8x64) SaturatedAdd(y Uint8x64) Uint8x64
-// CeilWithPrecision rounds elements up with specified precision, masked.
-// Const Immediate = 2.
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedCeilWithPrecision(imm uint8, y Mask32x16) Float32x16
+// Asm: VPADDSW, CPU Feature: AVX
+func (x Uint16x8) SaturatedAdd(y Uint16x8) Uint16x8
-// CeilWithPrecision rounds elements up with specified precision, masked.
-// Const Immediate = 2.
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedCeilWithPrecision(imm uint8, y Mask32x4) Float32x4
+// Asm: VPADDSW, CPU Feature: AVX2
+func (x Uint16x16) SaturatedAdd(y Uint16x16) Uint16x16
-// CeilWithPrecision rounds elements up with specified precision, masked.
-// Const Immediate = 2.
+// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedCeilWithPrecision(imm uint8, y Mask32x8) Float32x8
+// Asm: VPADDSW, CPU Feature: AVX512EVEX
+func (x Uint16x32) SaturatedAdd(y Uint16x32) Uint16x32
-// CeilWithPrecision rounds elements up with specified precision, masked.
-// Const Immediate = 2.
+/* SaturatedPairDotProdAccumulate */
+
+// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedCeilWithPrecision(imm uint8, y Mask64x2) Float64x2
+// Asm: VPDPWSSDS, CPU Feature: AVX_VNNI
+func (x Int32x4) SaturatedPairDotProdAccumulate(y Int32x4, z Int32x4) Int32x4
-// CeilWithPrecision rounds elements up with specified precision, masked.
-// Const Immediate = 2.
+// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedCeilWithPrecision(imm uint8, y Mask64x4) Float64x4
+// Asm: VPDPWSSDS, CPU Feature: AVX_VNNI
+func (x Int32x8) SaturatedPairDotProdAccumulate(y Int32x8, z Int32x8) Int32x8
-// CeilWithPrecision rounds elements up with specified precision, masked.
-// Const Immediate = 2.
+// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedCeilWithPrecision(imm uint8, y Mask64x8) Float64x8
+// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
+func (x Int32x16) SaturatedPairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16
-// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+/* SaturatedPairwiseAdd */
+
+// SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
+// Asm: VPHADDSW, CPU Feature: AVX
+func (x Int16x8) SaturatedPairwiseAdd(y Int16x8) Int16x8
-// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+// SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
+// Asm: VPHADDSW, CPU Feature: AVX2
+func (x Int16x16) SaturatedPairwiseAdd(y Int16x16) Int16x16
-// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+/* SaturatedPairwiseSub */
+
+// SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
+// Asm: VPHSUBSW, CPU Feature: AVX
+func (x Int16x8) SaturatedPairwiseSub(y Int16x8) Int16x8
-// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+// SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
+// Asm: VPHSUBSW, CPU Feature: AVX2
+func (x Int16x16) SaturatedPairwiseSub(y Int16x16) Int16x16
-// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+/* SaturatedSub */
+
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
+// Asm: VPSUBSB, CPU Feature: AVX
+func (x Int8x16) SaturatedSub(y Int8x16) Int8x16
-// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
-// Const Immediate = 10.
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
+// Asm: VPSUBSB, CPU Feature: AVX2
+func (x Int8x32) SaturatedSub(y Int8x32) Int8x32
-// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
-// Const Immediate = 2.
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask32x16) Float32x16
+// Asm: VPSUBSB, CPU Feature: AVX512EVEX
+func (x Int8x64) SaturatedSub(y Int8x64) Int8x64
-// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
-// Const Immediate = 2.
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask32x4) Float32x4
+// Asm: VPSUBSW, CPU Feature: AVX
+func (x Int16x8) SaturatedSub(y Int16x8) Int16x8
-// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
-// Const Immediate = 2.
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask32x8) Float32x8
+// Asm: VPSUBSW, CPU Feature: AVX2
+func (x Int16x16) SaturatedSub(y Int16x16) Int16x16
-// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
-// Const Immediate = 2.
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask64x2) Float64x2
+// Asm: VPSUBSW, CPU Feature: AVX512EVEX
+func (x Int16x32) SaturatedSub(y Int16x32) Int16x32
-// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
-// Const Immediate = 2.
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask64x4) Float64x4
+// Asm: VPSUBSB, CPU Feature: AVX
+func (x Uint8x16) SaturatedSub(y Uint8x16) Uint8x16
-// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
-// Const Immediate = 2.
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask64x8) Float64x8
+// Asm: VPSUBSB, CPU Feature: AVX2
+func (x Uint8x32) SaturatedSub(y Uint8x32) Uint8x32
-// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
-// Const Immediate = 9.
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
+// Asm: VPSUBSB, CPU Feature: AVX512EVEX
+func (x Uint8x64) SaturatedSub(y Uint8x64) Uint8x64
-// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
-// Const Immediate = 9.
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
+// Asm: VPSUBSW, CPU Feature: AVX
+func (x Uint16x8) SaturatedSub(y Uint16x8) Uint16x8
-// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
-// Const Immediate = 9.
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
+// Asm: VPSUBSW, CPU Feature: AVX2
+func (x Uint16x16) SaturatedSub(y Uint16x16) Uint16x16
-// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
-// Const Immediate = 9.
+// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
+// Asm: VPSUBSW, CPU Feature: AVX512EVEX
+func (x Uint16x32) SaturatedSub(y Uint16x32) Uint16x32
-// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
-// Const Immediate = 9.
+/* SaturatedUnsignedSignedPairDotProd */
+
+// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
+// yielding a vector of half as many elements with twice the input element size.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
+// Asm: VPMADDUBSW, CPU Feature: AVX
+func (x Uint8x16) SaturatedUnsignedSignedPairDotProd(y Int8x16) Int16x8
-// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
-// Const Immediate = 9.
+// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
+// yielding a vector of half as many elements with twice the input element size.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
+// Asm: VPMADDUBSW, CPU Feature: AVX2
+func (x Uint8x32) SaturatedUnsignedSignedPairDotProd(y Int8x32) Int16x16
-// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
-// Const Immediate = 1.
+// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
+// yielding a vector of half as many elements with twice the input element size.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask32x16) Float32x16
+// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
+func (x Uint16x8) SaturatedUnsignedSignedPairDotProd(y Int16x8) Int16x8
-// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
-// Const Immediate = 1.
+// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
+// yielding a vector of half as many elements with twice the input element size.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask32x4) Float32x4
+// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
+func (x Uint16x16) SaturatedUnsignedSignedPairDotProd(y Int16x16) Int16x16
-// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
-// Const Immediate = 1.
+// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
+// yielding a vector of half as many elements with twice the input element size.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask32x8) Float32x8
+// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
+func (x Uint16x32) SaturatedUnsignedSignedPairDotProd(y Int16x32) Int16x32
-// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
-// Const Immediate = 1.
+/* SaturatedUnsignedSignedQuadDotProdAccumulate */
+
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask64x2) Float64x2
+// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
+func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Int32x4
-// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
-// Const Immediate = 1.
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask64x4) Float64x4
+// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
+func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Int32x8
-// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
-// Const Immediate = 1.
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask64x8) Float64x8
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16
-// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
-// Const Immediate = 8.
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
+// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
+func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Uint32x4
-// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
-// Const Immediate = 8.
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
+// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
+func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Uint32x8
-// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
-// Const Immediate = 8.
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Uint32x16
+
+/* Sign */
+
+// Sign returns the product of the first operand with -1, 0, or 1,
+// whichever constant is nearest to the value of the second operand.
+//
+// Asm: VPSIGNB, CPU Feature: AVX
+func (x Int8x16) Sign(y Int8x16) Int8x16
-// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
-// Const Immediate = 8.
+// Sign returns the product of the first operand with -1, 0, or 1,
+// whichever constant is nearest to the value of the second operand.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
+// Asm: VPSIGNB, CPU Feature: AVX2
+func (x Int8x32) Sign(y Int8x32) Int8x32
-// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
-// Const Immediate = 8.
+// Sign returns the product of the first operand with -1, 0, or 1,
+// whichever constant is nearest to the value of the second operand.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
+// Asm: VPSIGNW, CPU Feature: AVX
+func (x Int16x8) Sign(y Int16x8) Int16x8
-// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
-// Const Immediate = 8.
+// Sign returns the product of the first operand with -1, 0, or 1,
+// whichever constant is nearest to the value of the second operand.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
+// Asm: VPSIGNW, CPU Feature: AVX2
+func (x Int16x16) Sign(y Int16x16) Int16x16
-// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
-// Const Immediate = 0.
+// Sign returns the product of the first operand with -1, 0, or 1,
+// whichever constant is nearest to the value of the second operand.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask32x16) Float32x16
+// Asm: VPSIGND, CPU Feature: AVX
+func (x Int32x4) Sign(y Int32x4) Int32x4
-// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
-// Const Immediate = 0.
+// Sign returns the product of the first operand with -1, 0, or 1,
+// whichever constant is nearest to the value of the second operand.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask32x4) Float32x4
+// Asm: VPSIGND, CPU Feature: AVX2
+func (x Int32x8) Sign(y Int32x8) Int32x8
-// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
-// Const Immediate = 0.
+/* Sqrt */
+
+// Sqrt computes the square root of each element.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask32x8) Float32x8
+// Asm: VSQRTPS, CPU Feature: AVX
+func (x Float32x4) Sqrt() Float32x4
-// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
-// Const Immediate = 0.
+// Sqrt computes the square root of each element.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask64x2) Float64x2
+// Asm: VSQRTPS, CPU Feature: AVX
+func (x Float32x8) Sqrt() Float32x8
-// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
-// Const Immediate = 0.
+// Sqrt computes the square root of each element.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask64x4) Float64x4
+// Asm: VSQRTPS, CPU Feature: AVX512EVEX
+func (x Float32x16) Sqrt() Float32x16
-// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
-// Const Immediate = 0.
+// Sqrt computes the square root of each element.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask64x8) Float64x8
+// Asm: VSQRTPD, CPU Feature: AVX
+func (x Float64x2) Sqrt() Float64x2
-// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
-// Const Immediate = 11.
+// Sqrt computes the square root of each element.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
+// Asm: VSQRTPD, CPU Feature: AVX
+func (x Float64x4) Sqrt() Float64x4
-// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
-// Const Immediate = 11.
+// Sqrt computes the square root of each element.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
+// Asm: VSQRTPD, CPU Feature: AVX512EVEX
+func (x Float64x8) Sqrt() Float64x8
-// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
-// Const Immediate = 11.
+/* Sub */
+
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
+// Asm: VSUBPS, CPU Feature: AVX
+func (x Float32x4) Sub(y Float32x4) Float32x4
-// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
-// Const Immediate = 11.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
+// Asm: VSUBPS, CPU Feature: AVX
+func (x Float32x8) Sub(y Float32x8) Float32x8
-// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
-// Const Immediate = 11.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
+// Asm: VSUBPS, CPU Feature: AVX512EVEX
+func (x Float32x16) Sub(y Float32x16) Float32x16
-// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
-// Const Immediate = 11.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
+// Asm: VSUBPD, CPU Feature: AVX
+func (x Float64x2) Sub(y Float64x2) Float64x2
-// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
-// Const Immediate = 3.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask32x16) Float32x16
+// Asm: VSUBPD, CPU Feature: AVX
+func (x Float64x4) Sub(y Float64x4) Float64x4
-// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
-// Const Immediate = 3.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask32x4) Float32x4
+// Asm: VSUBPD, CPU Feature: AVX512EVEX
+func (x Float64x8) Sub(y Float64x8) Float64x8
-// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
-// Const Immediate = 3.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask32x8) Float32x8
+// Asm: VPSUBB, CPU Feature: AVX
+func (x Int8x16) Sub(y Int8x16) Int8x16
-// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
-// Const Immediate = 3.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask64x2) Float64x2
+// Asm: VPSUBB, CPU Feature: AVX2
+func (x Int8x32) Sub(y Int8x32) Int8x32
-// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
-// Const Immediate = 3.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask64x4) Float64x4
+// Asm: VPSUBB, CPU Feature: AVX512EVEX
+func (x Int8x64) Sub(y Int8x64) Int8x64
-// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
-// Const Immediate = 3.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask64x8) Float64x8
+// Asm: VPSUBW, CPU Feature: AVX
+func (x Int16x8) Sub(y Int16x8) Int16x8
-// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
-// Const Immediate = 9.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
+// Asm: VPSUBW, CPU Feature: AVX2
+func (x Int16x16) Sub(y Int16x16) Int16x16
-// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
-// Const Immediate = 9.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
+// Asm: VPSUBW, CPU Feature: AVX512EVEX
+func (x Int16x32) Sub(y Int16x32) Int16x32
-// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
-// Const Immediate = 9.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
+// Asm: VPSUBD, CPU Feature: AVX
+func (x Int32x4) Sub(y Int32x4) Int32x4
-// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
-// Const Immediate = 9.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
+// Asm: VPSUBD, CPU Feature: AVX2
+func (x Int32x8) Sub(y Int32x8) Int32x8
-// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
-// Const Immediate = 9.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
+// Asm: VPSUBD, CPU Feature: AVX512EVEX
+func (x Int32x16) Sub(y Int32x16) Int32x16
-// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
-// Const Immediate = 9.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
+// Asm: VPSUBQ, CPU Feature: AVX
+func (x Int64x2) Sub(y Int64x2) Int64x2
-// FloorWithPrecision rounds elements down with specified precision, masked.
-// Const Immediate = 1.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedFloorWithPrecision(imm uint8, y Mask32x16) Float32x16
+// Asm: VPSUBQ, CPU Feature: AVX2
+func (x Int64x4) Sub(y Int64x4) Int64x4
-// FloorWithPrecision rounds elements down with specified precision, masked.
-// Const Immediate = 1.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedFloorWithPrecision(imm uint8, y Mask32x4) Float32x4
+// Asm: VPSUBQ, CPU Feature: AVX512EVEX
+func (x Int64x8) Sub(y Int64x8) Int64x8
-// FloorWithPrecision rounds elements down with specified precision, masked.
-// Const Immediate = 1.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedFloorWithPrecision(imm uint8, y Mask32x8) Float32x8
+// Asm: VPSUBB, CPU Feature: AVX
+func (x Uint8x16) Sub(y Uint8x16) Uint8x16
-// FloorWithPrecision rounds elements down with specified precision, masked.
-// Const Immediate = 1.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedFloorWithPrecision(imm uint8, y Mask64x2) Float64x2
+// Asm: VPSUBB, CPU Feature: AVX2
+func (x Uint8x32) Sub(y Uint8x32) Uint8x32
-// FloorWithPrecision rounds elements down with specified precision, masked.
-// Const Immediate = 1.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedFloorWithPrecision(imm uint8, y Mask64x4) Float64x4
+// Asm: VPSUBB, CPU Feature: AVX512EVEX
+func (x Uint8x64) Sub(y Uint8x64) Uint8x64
-// FloorWithPrecision rounds elements down with specified precision, masked.
-// Const Immediate = 1.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedFloorWithPrecision(imm uint8, y Mask64x8) Float64x8
+// Asm: VPSUBW, CPU Feature: AVX
+func (x Uint16x8) Sub(y Uint16x8) Uint16x8
-// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
-// Const Immediate = 8.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
+// Asm: VPSUBW, CPU Feature: AVX2
+func (x Uint16x16) Sub(y Uint16x16) Uint16x16
-// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
-// Const Immediate = 8.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
+// Asm: VPSUBW, CPU Feature: AVX512EVEX
+func (x Uint16x32) Sub(y Uint16x32) Uint16x32
-// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
-// Const Immediate = 8.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
+// Asm: VPSUBD, CPU Feature: AVX
+func (x Uint32x4) Sub(y Uint32x4) Uint32x4
-// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
-// Const Immediate = 8.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
+// Asm: VPSUBD, CPU Feature: AVX2
+func (x Uint32x8) Sub(y Uint32x8) Uint32x8
-// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
-// Const Immediate = 8.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
+// Asm: VPSUBD, CPU Feature: AVX512EVEX
+func (x Uint32x16) Sub(y Uint32x16) Uint32x16
-// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
-// Const Immediate = 8.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
+// Asm: VPSUBQ, CPU Feature: AVX
+func (x Uint64x2) Sub(y Uint64x2) Uint64x2
-// RoundWithPrecision rounds elements with specified precision.
-// Const Immediate = 0.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedRoundWithPrecision(imm uint8, y Mask32x16) Float32x16
+// Asm: VPSUBQ, CPU Feature: AVX2
+func (x Uint64x4) Sub(y Uint64x4) Uint64x4
-// RoundWithPrecision rounds elements with specified precision.
-// Const Immediate = 0.
+// Sub subtracts corresponding elements of two vectors.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedRoundWithPrecision(imm uint8, y Mask32x4) Float32x4
+// Asm: VPSUBQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) Sub(y Uint64x8) Uint64x8
-// RoundWithPrecision rounds elements with specified precision.
-// Const Immediate = 0.
+/* Trunc */
+
+// Trunc truncates elements towards zero.
+// Const Immediate = 3.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedRoundWithPrecision(imm uint8, y Mask32x8) Float32x8
+// Asm: VROUNDPS, CPU Feature: AVX
+func (x Float32x4) Trunc() Float32x4
-// RoundWithPrecision rounds elements with specified precision.
-// Const Immediate = 0.
+// Trunc truncates elements towards zero.
+// Const Immediate = 3.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedRoundWithPrecision(imm uint8, y Mask64x2) Float64x2
+// Asm: VROUNDPS, CPU Feature: AVX
+func (x Float32x8) Trunc() Float32x8
-// RoundWithPrecision rounds elements with specified precision.
-// Const Immediate = 0.
+// Trunc truncates elements towards zero.
+// Const Immediate = 3.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedRoundWithPrecision(imm uint8, y Mask64x4) Float64x4
+// Asm: VROUNDPD, CPU Feature: AVX
+func (x Float64x2) Trunc() Float64x2
-// RoundWithPrecision rounds elements with specified precision.
-// Const Immediate = 0.
+// Trunc truncates elements towards zero.
+// Const Immediate = 3.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedRoundWithPrecision(imm uint8, y Mask64x8) Float64x8
+// Asm: VROUNDPD, CPU Feature: AVX
+func (x Float64x4) Trunc() Float64x4
+
+/* TruncSuppressExceptionWithPrecision */
// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
// Const Immediate = 11.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
+func (x Float32x4) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x4
// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
// Const Immediate = 11.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
+func (x Float32x8) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x8
// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
// Const Immediate = 11.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
+func (x Float32x16) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x16
// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
// Const Immediate = 11.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
+func (x Float64x2) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x2
// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
// Const Immediate = 11.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
+func (x Float64x4) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x4
// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
// Const Immediate = 11.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
+func (x Float64x8) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+
+/* TruncWithPrecision */
// TruncWithPrecision truncates elements with specified precision.
// Const Immediate = 3.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) MaskedTruncWithPrecision(imm uint8, y Mask32x16) Float32x16
+func (x Float32x4) TruncWithPrecision(imm8 uint8) Float32x4
// TruncWithPrecision truncates elements with specified precision.
// Const Immediate = 3.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) MaskedTruncWithPrecision(imm uint8, y Mask32x4) Float32x4
+func (x Float32x8) TruncWithPrecision(imm8 uint8) Float32x8
// TruncWithPrecision truncates elements with specified precision.
// Const Immediate = 3.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) MaskedTruncWithPrecision(imm uint8, y Mask32x8) Float32x8
+func (x Float32x16) TruncWithPrecision(imm8 uint8) Float32x16
// TruncWithPrecision truncates elements with specified precision.
// Const Immediate = 3.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) MaskedTruncWithPrecision(imm uint8, y Mask64x2) Float64x2
+func (x Float64x2) TruncWithPrecision(imm8 uint8) Float64x2
// TruncWithPrecision truncates elements with specified precision.
// Const Immediate = 3.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) MaskedTruncWithPrecision(imm uint8, y Mask64x4) Float64x4
+func (x Float64x4) TruncWithPrecision(imm8 uint8) Float64x4
// TruncWithPrecision truncates elements with specified precision.
// Const Immediate = 3.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) MaskedTruncWithPrecision(imm uint8, y Mask64x8) Float64x8
+func (x Float64x8) TruncWithPrecision(imm8 uint8) Float64x8
+
+/* UnsignedSignedQuadDotProdAccumulate */
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
+func (x Int32x4) UnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Int32x4
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
+func (x Int32x8) UnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Int32x8
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Int32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
+func (x Uint32x4) UnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Uint32x4
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
+func (x Uint32x8) UnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Uint32x8
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Uint32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Uint32x16
+
+/* Xor */
+
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VXORPS, CPU Feature: AVX
+func (x Float32x4) Xor(y Float32x4) Float32x4
+
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VXORPS, CPU Feature: AVX
+func (x Float32x8) Xor(y Float32x8) Float32x8
+
+// Xor performs a masked bitwise XOR operation between two vectors.
+//
+// Asm: VXORPS, CPU Feature: AVX512EVEX
+func (x Float32x16) Xor(y Float32x16) Float32x16
+
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VXORPD, CPU Feature: AVX
+func (x Float64x2) Xor(y Float64x2) Float64x2
+
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VXORPD, CPU Feature: AVX
+func (x Float64x4) Xor(y Float64x4) Float64x4
+
+// Xor performs a masked bitwise XOR operation between two vectors.
+//
+// Asm: VXORPD, CPU Feature: AVX512EVEX
+func (x Float64x8) Xor(y Float64x8) Float64x8
+
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VPXOR, CPU Feature: AVX
+func (x Int8x16) Xor(y Int8x16) Int8x16
+
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VPXOR, CPU Feature: AVX2
+func (x Int8x32) Xor(y Int8x32) Int8x32
+
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VPXOR, CPU Feature: AVX
+func (x Int16x8) Xor(y Int16x8) Int16x8
+
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VPXOR, CPU Feature: AVX2
+func (x Int16x16) Xor(y Int16x16) Int16x16
+
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VPXOR, CPU Feature: AVX
+func (x Int32x4) Xor(y Int32x4) Int32x4
+
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VPXOR, CPU Feature: AVX2
+func (x Int32x8) Xor(y Int32x8) Int32x8
+
+// Xor performs a masked bitwise XOR operation between two vectors.
+//
+// Asm: VPXORD, CPU Feature: AVX512EVEX
+func (x Int32x16) Xor(y Int32x16) Int32x16
+
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VPXOR, CPU Feature: AVX
+func (x Int64x2) Xor(y Int64x2) Int64x2
+
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VPXOR, CPU Feature: AVX2
+func (x Int64x4) Xor(y Int64x4) Int64x4
+
+// Xor performs a masked bitwise XOR operation between two vectors.
+//
+// Asm: VPXORQ, CPU Feature: AVX512EVEX
+func (x Int64x8) Xor(y Int64x8) Int64x8
+
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VPXOR, CPU Feature: AVX
+func (x Uint8x16) Xor(y Uint8x16) Uint8x16
+
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VPXOR, CPU Feature: AVX2
+func (x Uint8x32) Xor(y Uint8x32) Uint8x32
+
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VPXOR, CPU Feature: AVX
+func (x Uint16x8) Xor(y Uint16x8) Uint16x8
+
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VPXOR, CPU Feature: AVX2
+func (x Uint16x16) Xor(y Uint16x16) Uint16x16
+
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VPXOR, CPU Feature: AVX
+func (x Uint32x4) Xor(y Uint32x4) Uint32x4
+
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VPXOR, CPU Feature: AVX2
+func (x Uint32x8) Xor(y Uint32x8) Uint32x8
+
+// Xor performs a masked bitwise XOR operation between two vectors.
+//
+// Asm: VPXORD, CPU Feature: AVX512EVEX
+func (x Uint32x16) Xor(y Uint32x16) Uint32x16
+
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VPXOR, CPU Feature: AVX
+func (x Uint64x2) Xor(y Uint64x2) Uint64x2
+
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VPXOR, CPU Feature: AVX2
+func (x Uint64x4) Xor(y Uint64x4) Uint64x4
+
+// Xor performs a masked bitwise XOR operation between two vectors.
+//
+// Asm: VPXORQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) Xor(y Uint64x8) Uint64x8
// Float64x8 converts from Float32x16 to Float64x8
func (from Float32x16) AsFloat64x8() (to Float64x8)