From 17baae72db6f31275383ecb091ee3ec722e290ad Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Mon, 14 Jul 2025 19:44:57 +0000 Subject: [PATCH] [dev.simd] simd: default mask param's name to mask This CL is generated by CL 687920. Change-Id: Iab0d7c28c923380df51806ba572ec59f9b031de8 Reviewed-on: https://go-review.googlesource.com/c/go/+/687955 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- src/simd/ops_amd64.go | 1632 ++++++++++++++++++++--------------------- 1 file changed, 816 insertions(+), 816 deletions(-) diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go index 29899f8cb1..ebb626358f 100644 --- a/src/simd/ops_amd64.go +++ b/src/simd/ops_amd64.go @@ -71,62 +71,62 @@ func (x Int64x8) Absolute() Int64x8 // AbsoluteMasked computes the absolute value of each element. // // Asm: VPABSB, CPU Feature: AVX512BW -func (x Int8x16) AbsoluteMasked(y Mask8x16) Int8x16 +func (x Int8x16) AbsoluteMasked(mask Mask8x16) Int8x16 // AbsoluteMasked computes the absolute value of each element. // // Asm: VPABSB, CPU Feature: AVX512BW -func (x Int8x32) AbsoluteMasked(y Mask8x32) Int8x32 +func (x Int8x32) AbsoluteMasked(mask Mask8x32) Int8x32 // AbsoluteMasked computes the absolute value of each element. // // Asm: VPABSB, CPU Feature: AVX512BW -func (x Int8x64) AbsoluteMasked(y Mask8x64) Int8x64 +func (x Int8x64) AbsoluteMasked(mask Mask8x64) Int8x64 // AbsoluteMasked computes the absolute value of each element. // // Asm: VPABSW, CPU Feature: AVX512BW -func (x Int16x8) AbsoluteMasked(y Mask16x8) Int16x8 +func (x Int16x8) AbsoluteMasked(mask Mask16x8) Int16x8 // AbsoluteMasked computes the absolute value of each element. // // Asm: VPABSW, CPU Feature: AVX512BW -func (x Int16x16) AbsoluteMasked(y Mask16x16) Int16x16 +func (x Int16x16) AbsoluteMasked(mask Mask16x16) Int16x16 // AbsoluteMasked computes the absolute value of each element. // // Asm: VPABSW, CPU Feature: AVX512BW -func (x Int16x32) AbsoluteMasked(y Mask16x32) Int16x32 +func (x Int16x32) AbsoluteMasked(mask Mask16x32) Int16x32 // AbsoluteMasked computes the absolute value of each element. // // Asm: VPABSD, CPU Feature: AVX512F -func (x Int32x4) AbsoluteMasked(y Mask32x4) Int32x4 +func (x Int32x4) AbsoluteMasked(mask Mask32x4) Int32x4 // AbsoluteMasked computes the absolute value of each element. // // Asm: VPABSD, CPU Feature: AVX512F -func (x Int32x8) AbsoluteMasked(y Mask32x8) Int32x8 +func (x Int32x8) AbsoluteMasked(mask Mask32x8) Int32x8 // AbsoluteMasked computes the absolute value of each element. // // Asm: VPABSD, CPU Feature: AVX512F -func (x Int32x16) AbsoluteMasked(y Mask32x16) Int32x16 +func (x Int32x16) AbsoluteMasked(mask Mask32x16) Int32x16 // AbsoluteMasked computes the absolute value of each element. // // Asm: VPABSQ, CPU Feature: AVX512F -func (x Int64x2) AbsoluteMasked(y Mask64x2) Int64x2 +func (x Int64x2) AbsoluteMasked(mask Mask64x2) Int64x2 // AbsoluteMasked computes the absolute value of each element. // // Asm: VPABSQ, CPU Feature: AVX512F -func (x Int64x4) AbsoluteMasked(y Mask64x4) Int64x4 +func (x Int64x4) AbsoluteMasked(mask Mask64x4) Int64x4 // AbsoluteMasked computes the absolute value of each element. // // Asm: VPABSQ, CPU Feature: AVX512F -func (x Int64x8) AbsoluteMasked(y Mask64x8) Int64x8 +func (x Int64x8) AbsoluteMasked(mask Mask64x8) Int64x8 /* Add */ @@ -285,152 +285,152 @@ func (x Uint64x8) Add(y Uint64x8) Uint64x8 // AddMasked adds corresponding elements of two vectors. // // Asm: VADDPS, CPU Feature: AVX512F -func (x Float32x4) AddMasked(y Float32x4, z Mask32x4) Float32x4 +func (x Float32x4) AddMasked(y Float32x4, mask Mask32x4) Float32x4 // AddMasked adds corresponding elements of two vectors. // // Asm: VADDPS, CPU Feature: AVX512F -func (x Float32x8) AddMasked(y Float32x8, z Mask32x8) Float32x8 +func (x Float32x8) AddMasked(y Float32x8, mask Mask32x8) Float32x8 // AddMasked adds corresponding elements of two vectors. // // Asm: VADDPS, CPU Feature: AVX512F -func (x Float32x16) AddMasked(y Float32x16, z Mask32x16) Float32x16 +func (x Float32x16) AddMasked(y Float32x16, mask Mask32x16) Float32x16 // AddMasked adds corresponding elements of two vectors. // // Asm: VADDPD, CPU Feature: AVX512F -func (x Float64x2) AddMasked(y Float64x2, z Mask64x2) Float64x2 +func (x Float64x2) AddMasked(y Float64x2, mask Mask64x2) Float64x2 // AddMasked adds corresponding elements of two vectors. // // Asm: VADDPD, CPU Feature: AVX512F -func (x Float64x4) AddMasked(y Float64x4, z Mask64x4) Float64x4 +func (x Float64x4) AddMasked(y Float64x4, mask Mask64x4) Float64x4 // AddMasked adds corresponding elements of two vectors. // // Asm: VADDPD, CPU Feature: AVX512F -func (x Float64x8) AddMasked(y Float64x8, z Mask64x8) Float64x8 +func (x Float64x8) AddMasked(y Float64x8, mask Mask64x8) Float64x8 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDB, CPU Feature: AVX512BW -func (x Int8x16) AddMasked(y Int8x16, z Mask8x16) Int8x16 +func (x Int8x16) AddMasked(y Int8x16, mask Mask8x16) Int8x16 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDB, CPU Feature: AVX512BW -func (x Int8x32) AddMasked(y Int8x32, z Mask8x32) Int8x32 +func (x Int8x32) AddMasked(y Int8x32, mask Mask8x32) Int8x32 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDB, CPU Feature: AVX512BW -func (x Int8x64) AddMasked(y Int8x64, z Mask8x64) Int8x64 +func (x Int8x64) AddMasked(y Int8x64, mask Mask8x64) Int8x64 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDW, CPU Feature: AVX512BW -func (x Int16x8) AddMasked(y Int16x8, z Mask16x8) Int16x8 +func (x Int16x8) AddMasked(y Int16x8, mask Mask16x8) Int16x8 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDW, CPU Feature: AVX512BW -func (x Int16x16) AddMasked(y Int16x16, z Mask16x16) Int16x16 +func (x Int16x16) AddMasked(y Int16x16, mask Mask16x16) Int16x16 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDW, CPU Feature: AVX512BW -func (x Int16x32) AddMasked(y Int16x32, z Mask16x32) Int16x32 +func (x Int16x32) AddMasked(y Int16x32, mask Mask16x32) Int16x32 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDD, CPU Feature: AVX512F -func (x Int32x4) AddMasked(y Int32x4, z Mask32x4) Int32x4 +func (x Int32x4) AddMasked(y Int32x4, mask Mask32x4) Int32x4 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDD, CPU Feature: AVX512F -func (x Int32x8) AddMasked(y Int32x8, z Mask32x8) Int32x8 +func (x Int32x8) AddMasked(y Int32x8, mask Mask32x8) Int32x8 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDD, CPU Feature: AVX512F -func (x Int32x16) AddMasked(y Int32x16, z Mask32x16) Int32x16 +func (x Int32x16) AddMasked(y Int32x16, mask Mask32x16) Int32x16 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDQ, CPU Feature: AVX512F -func (x Int64x2) AddMasked(y Int64x2, z Mask64x2) Int64x2 +func (x Int64x2) AddMasked(y Int64x2, mask Mask64x2) Int64x2 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDQ, CPU Feature: AVX512F -func (x Int64x4) AddMasked(y Int64x4, z Mask64x4) Int64x4 +func (x Int64x4) AddMasked(y Int64x4, mask Mask64x4) Int64x4 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDQ, CPU Feature: AVX512F -func (x Int64x8) AddMasked(y Int64x8, z Mask64x8) Int64x8 +func (x Int64x8) AddMasked(y Int64x8, mask Mask64x8) Int64x8 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDB, CPU Feature: AVX512BW -func (x Uint8x16) AddMasked(y Uint8x16, z Mask8x16) Uint8x16 +func (x Uint8x16) AddMasked(y Uint8x16, mask Mask8x16) Uint8x16 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDB, CPU Feature: AVX512BW -func (x Uint8x32) AddMasked(y Uint8x32, z Mask8x32) Uint8x32 +func (x Uint8x32) AddMasked(y Uint8x32, mask Mask8x32) Uint8x32 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDB, CPU Feature: AVX512BW -func (x Uint8x64) AddMasked(y Uint8x64, z Mask8x64) Uint8x64 +func (x Uint8x64) AddMasked(y Uint8x64, mask Mask8x64) Uint8x64 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDW, CPU Feature: AVX512BW -func (x Uint16x8) AddMasked(y Uint16x8, z Mask16x8) Uint16x8 +func (x Uint16x8) AddMasked(y Uint16x8, mask Mask16x8) Uint16x8 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDW, CPU Feature: AVX512BW -func (x Uint16x16) AddMasked(y Uint16x16, z Mask16x16) Uint16x16 +func (x Uint16x16) AddMasked(y Uint16x16, mask Mask16x16) Uint16x16 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDW, CPU Feature: AVX512BW -func (x Uint16x32) AddMasked(y Uint16x32, z Mask16x32) Uint16x32 +func (x Uint16x32) AddMasked(y Uint16x32, mask Mask16x32) Uint16x32 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDD, CPU Feature: AVX512F -func (x Uint32x4) AddMasked(y Uint32x4, z Mask32x4) Uint32x4 +func (x Uint32x4) AddMasked(y Uint32x4, mask Mask32x4) Uint32x4 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDD, CPU Feature: AVX512F -func (x Uint32x8) AddMasked(y Uint32x8, z Mask32x8) Uint32x8 +func (x Uint32x8) AddMasked(y Uint32x8, mask Mask32x8) Uint32x8 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDD, CPU Feature: AVX512F -func (x Uint32x16) AddMasked(y Uint32x16, z Mask32x16) Uint32x16 +func (x Uint32x16) AddMasked(y Uint32x16, mask Mask32x16) Uint32x16 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDQ, CPU Feature: AVX512F -func (x Uint64x2) AddMasked(y Uint64x2, z Mask64x2) Uint64x2 +func (x Uint64x2) AddMasked(y Uint64x2, mask Mask64x2) Uint64x2 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDQ, CPU Feature: AVX512F -func (x Uint64x4) AddMasked(y Uint64x4, z Mask64x4) Uint64x4 +func (x Uint64x4) AddMasked(y Uint64x4, mask Mask64x4) Uint64x4 // AddMasked adds corresponding elements of two vectors. // // Asm: VPADDQ, CPU Feature: AVX512F -func (x Uint64x8) AddMasked(y Uint64x8, z Mask64x8) Uint64x8 +func (x Uint64x8) AddMasked(y Uint64x8, mask Mask64x8) Uint64x8 /* AddSub */ @@ -561,62 +561,62 @@ func (x Uint64x8) And(y Uint64x8) Uint64x8 // AndMasked performs a masked bitwise AND operation between two vectors. // // Asm: VPANDD, CPU Feature: AVX512F -func (x Int32x4) AndMasked(y Int32x4, z Mask32x4) Int32x4 +func (x Int32x4) AndMasked(y Int32x4, mask Mask32x4) Int32x4 // AndMasked performs a masked bitwise AND operation between two vectors. // // Asm: VPANDD, CPU Feature: AVX512F -func (x Int32x8) AndMasked(y Int32x8, z Mask32x8) Int32x8 +func (x Int32x8) AndMasked(y Int32x8, mask Mask32x8) Int32x8 // AndMasked performs a masked bitwise AND operation between two vectors. // // Asm: VPANDD, CPU Feature: AVX512F -func (x Int32x16) AndMasked(y Int32x16, z Mask32x16) Int32x16 +func (x Int32x16) AndMasked(y Int32x16, mask Mask32x16) Int32x16 // AndMasked performs a masked bitwise AND operation between two vectors. // // Asm: VPANDQ, CPU Feature: AVX512F -func (x Int64x2) AndMasked(y Int64x2, z Mask64x2) Int64x2 +func (x Int64x2) AndMasked(y Int64x2, mask Mask64x2) Int64x2 // AndMasked performs a masked bitwise AND operation between two vectors. // // Asm: VPANDQ, CPU Feature: AVX512F -func (x Int64x4) AndMasked(y Int64x4, z Mask64x4) Int64x4 +func (x Int64x4) AndMasked(y Int64x4, mask Mask64x4) Int64x4 // AndMasked performs a masked bitwise AND operation between two vectors. // // Asm: VPANDQ, CPU Feature: AVX512F -func (x Int64x8) AndMasked(y Int64x8, z Mask64x8) Int64x8 +func (x Int64x8) AndMasked(y Int64x8, mask Mask64x8) Int64x8 // AndMasked performs a masked bitwise AND operation between two vectors. // // Asm: VPANDD, CPU Feature: AVX512F -func (x Uint32x4) AndMasked(y Uint32x4, z Mask32x4) Uint32x4 +func (x Uint32x4) AndMasked(y Uint32x4, mask Mask32x4) Uint32x4 // AndMasked performs a masked bitwise AND operation between two vectors. // // Asm: VPANDD, CPU Feature: AVX512F -func (x Uint32x8) AndMasked(y Uint32x8, z Mask32x8) Uint32x8 +func (x Uint32x8) AndMasked(y Uint32x8, mask Mask32x8) Uint32x8 // AndMasked performs a masked bitwise AND operation between two vectors. // // Asm: VPANDD, CPU Feature: AVX512F -func (x Uint32x16) AndMasked(y Uint32x16, z Mask32x16) Uint32x16 +func (x Uint32x16) AndMasked(y Uint32x16, mask Mask32x16) Uint32x16 // AndMasked performs a masked bitwise AND operation between two vectors. // // Asm: VPANDQ, CPU Feature: AVX512F -func (x Uint64x2) AndMasked(y Uint64x2, z Mask64x2) Uint64x2 +func (x Uint64x2) AndMasked(y Uint64x2, mask Mask64x2) Uint64x2 // AndMasked performs a masked bitwise AND operation between two vectors. // // Asm: VPANDQ, CPU Feature: AVX512F -func (x Uint64x4) AndMasked(y Uint64x4, z Mask64x4) Uint64x4 +func (x Uint64x4) AndMasked(y Uint64x4, mask Mask64x4) Uint64x4 // AndMasked performs a masked bitwise AND operation between two vectors. // // Asm: VPANDQ, CPU Feature: AVX512F -func (x Uint64x8) AndMasked(y Uint64x8, z Mask64x8) Uint64x8 +func (x Uint64x8) AndMasked(y Uint64x8, mask Mask64x8) Uint64x8 /* AndNot */ @@ -725,62 +725,62 @@ func (x Uint64x8) AndNot(y Uint64x8) Uint64x8 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors. // // Asm: VPANDND, CPU Feature: AVX512F -func (x Int32x4) AndNotMasked(y Int32x4, z Mask32x4) Int32x4 +func (x Int32x4) AndNotMasked(y Int32x4, mask Mask32x4) Int32x4 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors. // // Asm: VPANDND, CPU Feature: AVX512F -func (x Int32x8) AndNotMasked(y Int32x8, z Mask32x8) Int32x8 +func (x Int32x8) AndNotMasked(y Int32x8, mask Mask32x8) Int32x8 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors. // // Asm: VPANDND, CPU Feature: AVX512F -func (x Int32x16) AndNotMasked(y Int32x16, z Mask32x16) Int32x16 +func (x Int32x16) AndNotMasked(y Int32x16, mask Mask32x16) Int32x16 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors. // // Asm: VPANDNQ, CPU Feature: AVX512F -func (x Int64x2) AndNotMasked(y Int64x2, z Mask64x2) Int64x2 +func (x Int64x2) AndNotMasked(y Int64x2, mask Mask64x2) Int64x2 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors. // // Asm: VPANDNQ, CPU Feature: AVX512F -func (x Int64x4) AndNotMasked(y Int64x4, z Mask64x4) Int64x4 +func (x Int64x4) AndNotMasked(y Int64x4, mask Mask64x4) Int64x4 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors. // // Asm: VPANDNQ, CPU Feature: AVX512F -func (x Int64x8) AndNotMasked(y Int64x8, z Mask64x8) Int64x8 +func (x Int64x8) AndNotMasked(y Int64x8, mask Mask64x8) Int64x8 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors. // // Asm: VPANDND, CPU Feature: AVX512F -func (x Uint32x4) AndNotMasked(y Uint32x4, z Mask32x4) Uint32x4 +func (x Uint32x4) AndNotMasked(y Uint32x4, mask Mask32x4) Uint32x4 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors. // // Asm: VPANDND, CPU Feature: AVX512F -func (x Uint32x8) AndNotMasked(y Uint32x8, z Mask32x8) Uint32x8 +func (x Uint32x8) AndNotMasked(y Uint32x8, mask Mask32x8) Uint32x8 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors. // // Asm: VPANDND, CPU Feature: AVX512F -func (x Uint32x16) AndNotMasked(y Uint32x16, z Mask32x16) Uint32x16 +func (x Uint32x16) AndNotMasked(y Uint32x16, mask Mask32x16) Uint32x16 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors. // // Asm: VPANDNQ, CPU Feature: AVX512F -func (x Uint64x2) AndNotMasked(y Uint64x2, z Mask64x2) Uint64x2 +func (x Uint64x2) AndNotMasked(y Uint64x2, mask Mask64x2) Uint64x2 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors. // // Asm: VPANDNQ, CPU Feature: AVX512F -func (x Uint64x4) AndNotMasked(y Uint64x4, z Mask64x4) Uint64x4 +func (x Uint64x4) AndNotMasked(y Uint64x4, mask Mask64x4) Uint64x4 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors. // // Asm: VPANDNQ, CPU Feature: AVX512F -func (x Uint64x8) AndNotMasked(y Uint64x8, z Mask64x8) Uint64x8 +func (x Uint64x8) AndNotMasked(y Uint64x8, mask Mask64x8) Uint64x8 /* ApproximateReciprocal */ @@ -819,32 +819,32 @@ func (x Float64x8) ApproximateReciprocal() Float64x8 // ApproximateReciprocalMasked computes an approximate reciprocal of each element. // // Asm: VRCP14PS, CPU Feature: AVX512F -func (x Float32x4) ApproximateReciprocalMasked(y Mask32x4) Float32x4 +func (x Float32x4) ApproximateReciprocalMasked(mask Mask32x4) Float32x4 // ApproximateReciprocalMasked computes an approximate reciprocal of each element. // // Asm: VRCP14PS, CPU Feature: AVX512F -func (x Float32x8) ApproximateReciprocalMasked(y Mask32x8) Float32x8 +func (x Float32x8) ApproximateReciprocalMasked(mask Mask32x8) Float32x8 // ApproximateReciprocalMasked computes an approximate reciprocal of each element. // // Asm: VRCP14PS, CPU Feature: AVX512F -func (x Float32x16) ApproximateReciprocalMasked(y Mask32x16) Float32x16 +func (x Float32x16) ApproximateReciprocalMasked(mask Mask32x16) Float32x16 // ApproximateReciprocalMasked computes an approximate reciprocal of each element. // // Asm: VRCP14PD, CPU Feature: AVX512F -func (x Float64x2) ApproximateReciprocalMasked(y Mask64x2) Float64x2 +func (x Float64x2) ApproximateReciprocalMasked(mask Mask64x2) Float64x2 // ApproximateReciprocalMasked computes an approximate reciprocal of each element. // // Asm: VRCP14PD, CPU Feature: AVX512F -func (x Float64x4) ApproximateReciprocalMasked(y Mask64x4) Float64x4 +func (x Float64x4) ApproximateReciprocalMasked(mask Mask64x4) Float64x4 // ApproximateReciprocalMasked computes an approximate reciprocal of each element. // // Asm: VRCP14PD, CPU Feature: AVX512F -func (x Float64x8) ApproximateReciprocalMasked(y Mask64x8) Float64x8 +func (x Float64x8) ApproximateReciprocalMasked(mask Mask64x8) Float64x8 /* ApproximateReciprocalOfSqrt */ @@ -883,32 +883,32 @@ func (x Float64x8) ApproximateReciprocalOfSqrt() Float64x8 // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element. // // Asm: VRSQRT14PS, CPU Feature: AVX512F -func (x Float32x4) ApproximateReciprocalOfSqrtMasked(y Mask32x4) Float32x4 +func (x Float32x4) ApproximateReciprocalOfSqrtMasked(mask Mask32x4) Float32x4 // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element. // // Asm: VRSQRT14PS, CPU Feature: AVX512F -func (x Float32x8) ApproximateReciprocalOfSqrtMasked(y Mask32x8) Float32x8 +func (x Float32x8) ApproximateReciprocalOfSqrtMasked(mask Mask32x8) Float32x8 // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element. // // Asm: VRSQRT14PS, CPU Feature: AVX512F -func (x Float32x16) ApproximateReciprocalOfSqrtMasked(y Mask32x16) Float32x16 +func (x Float32x16) ApproximateReciprocalOfSqrtMasked(mask Mask32x16) Float32x16 // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element. // // Asm: VRSQRT14PD, CPU Feature: AVX512F -func (x Float64x2) ApproximateReciprocalOfSqrtMasked(y Mask64x2) Float64x2 +func (x Float64x2) ApproximateReciprocalOfSqrtMasked(mask Mask64x2) Float64x2 // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element. // // Asm: VRSQRT14PD, CPU Feature: AVX512F -func (x Float64x4) ApproximateReciprocalOfSqrtMasked(y Mask64x4) Float64x4 +func (x Float64x4) ApproximateReciprocalOfSqrtMasked(mask Mask64x4) Float64x4 // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element. // // Asm: VRSQRT14PD, CPU Feature: AVX512F -func (x Float64x8) ApproximateReciprocalOfSqrtMasked(y Mask64x8) Float64x8 +func (x Float64x8) ApproximateReciprocalOfSqrtMasked(mask Mask64x8) Float64x8 /* Average */ @@ -947,32 +947,32 @@ func (x Uint16x32) Average(y Uint16x32) Uint16x32 // AverageMasked computes the rounded average of corresponding elements. // // Asm: VPAVGB, CPU Feature: AVX512BW -func (x Uint8x16) AverageMasked(y Uint8x16, z Mask8x16) Uint8x16 +func (x Uint8x16) AverageMasked(y Uint8x16, mask Mask8x16) Uint8x16 // AverageMasked computes the rounded average of corresponding elements. // // Asm: VPAVGB, CPU Feature: AVX512BW -func (x Uint8x32) AverageMasked(y Uint8x32, z Mask8x32) Uint8x32 +func (x Uint8x32) AverageMasked(y Uint8x32, mask Mask8x32) Uint8x32 // AverageMasked computes the rounded average of corresponding elements. // // Asm: VPAVGB, CPU Feature: AVX512BW -func (x Uint8x64) AverageMasked(y Uint8x64, z Mask8x64) Uint8x64 +func (x Uint8x64) AverageMasked(y Uint8x64, mask Mask8x64) Uint8x64 // AverageMasked computes the rounded average of corresponding elements. // // Asm: VPAVGW, CPU Feature: AVX512BW -func (x Uint16x8) AverageMasked(y Uint16x8, z Mask16x8) Uint16x8 +func (x Uint16x8) AverageMasked(y Uint16x8, mask Mask16x8) Uint16x8 // AverageMasked computes the rounded average of corresponding elements. // // Asm: VPAVGW, CPU Feature: AVX512BW -func (x Uint16x16) AverageMasked(y Uint16x16, z Mask16x16) Uint16x16 +func (x Uint16x16) AverageMasked(y Uint16x16, mask Mask16x16) Uint16x16 // AverageMasked computes the rounded average of corresponding elements. // // Asm: VPAVGW, CPU Feature: AVX512BW -func (x Uint16x32) AverageMasked(y Uint16x32, z Mask16x32) Uint16x32 +func (x Uint16x32) AverageMasked(y Uint16x32, mask Mask16x32) Uint16x32 /* Ceil */ @@ -1047,42 +1047,42 @@ func (x Float64x8) CeilWithPrecision(prec uint8) Float64x8 // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPS, CPU Feature: AVX512F -func (x Float32x4) CeilWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4 +func (x Float32x4) CeilWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4 // CeilWithPrecisionMasked rounds elements up with specified precision, masked. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPS, CPU Feature: AVX512F -func (x Float32x8) CeilWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8 +func (x Float32x8) CeilWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8 // CeilWithPrecisionMasked rounds elements up with specified precision, masked. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPS, CPU Feature: AVX512F -func (x Float32x16) CeilWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16 +func (x Float32x16) CeilWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16 // CeilWithPrecisionMasked rounds elements up with specified precision, masked. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPD, CPU Feature: AVX512F -func (x Float64x2) CeilWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2 +func (x Float64x2) CeilWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2 // CeilWithPrecisionMasked rounds elements up with specified precision, masked. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPD, CPU Feature: AVX512F -func (x Float64x4) CeilWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4 +func (x Float64x4) CeilWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4 // CeilWithPrecisionMasked rounds elements up with specified precision, masked. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPD, CPU Feature: AVX512F -func (x Float64x8) CeilWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8 +func (x Float64x8) CeilWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8 /* DiffWithCeilWithPrecision */ @@ -1135,42 +1135,42 @@ func (x Float64x8) DiffWithCeilWithPrecision(prec uint8) Float64x8 // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPS, CPU Feature: AVX512DQ -func (x Float32x4) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4 +func (x Float32x4) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4 // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPS, CPU Feature: AVX512DQ -func (x Float32x8) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8 +func (x Float32x8) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8 // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPS, CPU Feature: AVX512DQ -func (x Float32x16) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16 +func (x Float32x16) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16 // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPD, CPU Feature: AVX512DQ -func (x Float64x2) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2 +func (x Float64x2) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2 // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPD, CPU Feature: AVX512DQ -func (x Float64x4) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4 +func (x Float64x4) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4 // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPD, CPU Feature: AVX512DQ -func (x Float64x8) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8 +func (x Float64x8) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8 /* DiffWithFloorWithPrecision */ @@ -1223,42 +1223,42 @@ func (x Float64x8) DiffWithFloorWithPrecision(prec uint8) Float64x8 // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPS, CPU Feature: AVX512DQ -func (x Float32x4) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4 +func (x Float32x4) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4 // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPS, CPU Feature: AVX512DQ -func (x Float32x8) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8 +func (x Float32x8) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8 // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPS, CPU Feature: AVX512DQ -func (x Float32x16) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16 +func (x Float32x16) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16 // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPD, CPU Feature: AVX512DQ -func (x Float64x2) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2 +func (x Float64x2) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2 // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPD, CPU Feature: AVX512DQ -func (x Float64x4) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4 +func (x Float64x4) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4 // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPD, CPU Feature: AVX512DQ -func (x Float64x8) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8 +func (x Float64x8) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8 /* DiffWithRoundWithPrecision */ @@ -1311,42 +1311,42 @@ func (x Float64x8) DiffWithRoundWithPrecision(prec uint8) Float64x8 // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPS, CPU Feature: AVX512DQ -func (x Float32x4) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4 +func (x Float32x4) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4 // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPS, CPU Feature: AVX512DQ -func (x Float32x8) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8 +func (x Float32x8) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8 // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPS, CPU Feature: AVX512DQ -func (x Float32x16) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16 +func (x Float32x16) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16 // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPD, CPU Feature: AVX512DQ -func (x Float64x2) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2 +func (x Float64x2) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2 // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPD, CPU Feature: AVX512DQ -func (x Float64x4) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4 +func (x Float64x4) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4 // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPD, CPU Feature: AVX512DQ -func (x Float64x8) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8 +func (x Float64x8) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8 /* DiffWithTruncWithPrecision */ @@ -1399,42 +1399,42 @@ func (x Float64x8) DiffWithTruncWithPrecision(prec uint8) Float64x8 // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPS, CPU Feature: AVX512DQ -func (x Float32x4) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4 +func (x Float32x4) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4 // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPS, CPU Feature: AVX512DQ -func (x Float32x8) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8 +func (x Float32x8) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8 // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPS, CPU Feature: AVX512DQ -func (x Float32x16) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16 +func (x Float32x16) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16 // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPD, CPU Feature: AVX512DQ -func (x Float64x2) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2 +func (x Float64x2) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2 // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPD, CPU Feature: AVX512DQ -func (x Float64x4) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4 +func (x Float64x4) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4 // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VREDUCEPD, CPU Feature: AVX512DQ -func (x Float64x8) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8 +func (x Float64x8) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8 /* Div */ @@ -1473,32 +1473,32 @@ func (x Float64x8) Div(y Float64x8) Float64x8 // DivMasked divides elements of two vectors. // // Asm: VDIVPS, CPU Feature: AVX512F -func (x Float32x4) DivMasked(y Float32x4, z Mask32x4) Float32x4 +func (x Float32x4) DivMasked(y Float32x4, mask Mask32x4) Float32x4 // DivMasked divides elements of two vectors. // // Asm: VDIVPS, CPU Feature: AVX512F -func (x Float32x8) DivMasked(y Float32x8, z Mask32x8) Float32x8 +func (x Float32x8) DivMasked(y Float32x8, mask Mask32x8) Float32x8 // DivMasked divides elements of two vectors. // // Asm: VDIVPS, CPU Feature: AVX512F -func (x Float32x16) DivMasked(y Float32x16, z Mask32x16) Float32x16 +func (x Float32x16) DivMasked(y Float32x16, mask Mask32x16) Float32x16 // DivMasked divides elements of two vectors. // // Asm: VDIVPD, CPU Feature: AVX512F -func (x Float64x2) DivMasked(y Float64x2, z Mask64x2) Float64x2 +func (x Float64x2) DivMasked(y Float64x2, mask Mask64x2) Float64x2 // DivMasked divides elements of two vectors. // // Asm: VDIVPD, CPU Feature: AVX512F -func (x Float64x4) DivMasked(y Float64x4, z Mask64x4) Float64x4 +func (x Float64x4) DivMasked(y Float64x4, mask Mask64x4) Float64x4 // DivMasked divides elements of two vectors. // // Asm: VDIVPD, CPU Feature: AVX512F -func (x Float64x8) DivMasked(y Float64x8, z Mask64x8) Float64x8 +func (x Float64x8) DivMasked(y Float64x8, mask Mask64x8) Float64x8 /* DotProdBroadcast */ @@ -1674,152 +1674,152 @@ func (x Uint64x8) Equal(y Uint64x8) Mask64x8 // EqualMasked compares for equality, masked. // // Asm: VCMPPS, CPU Feature: AVX512F -func (x Float32x4) EqualMasked(y Float32x4, z Mask32x4) Mask32x4 +func (x Float32x4) EqualMasked(y Float32x4, mask Mask32x4) Mask32x4 // EqualMasked compares for equality, masked. // // Asm: VCMPPS, CPU Feature: AVX512F -func (x Float32x8) EqualMasked(y Float32x8, z Mask32x8) Mask32x8 +func (x Float32x8) EqualMasked(y Float32x8, mask Mask32x8) Mask32x8 // EqualMasked compares for equality, masked. // // Asm: VCMPPS, CPU Feature: AVX512F -func (x Float32x16) EqualMasked(y Float32x16, z Mask32x16) Mask32x16 +func (x Float32x16) EqualMasked(y Float32x16, mask Mask32x16) Mask32x16 // EqualMasked compares for equality, masked. // // Asm: VCMPPD, CPU Feature: AVX512F -func (x Float64x2) EqualMasked(y Float64x2, z Mask64x2) Mask64x2 +func (x Float64x2) EqualMasked(y Float64x2, mask Mask64x2) Mask64x2 // EqualMasked compares for equality, masked. // // Asm: VCMPPD, CPU Feature: AVX512F -func (x Float64x4) EqualMasked(y Float64x4, z Mask64x4) Mask64x4 +func (x Float64x4) EqualMasked(y Float64x4, mask Mask64x4) Mask64x4 // EqualMasked compares for equality, masked. // // Asm: VCMPPD, CPU Feature: AVX512F -func (x Float64x8) EqualMasked(y Float64x8, z Mask64x8) Mask64x8 +func (x Float64x8) EqualMasked(y Float64x8, mask Mask64x8) Mask64x8 // EqualMasked compares for equality, masked. // // Asm: VPCMPB, CPU Feature: AVX512BW -func (x Int8x16) EqualMasked(y Int8x16, z Mask8x16) Mask8x16 +func (x Int8x16) EqualMasked(y Int8x16, mask Mask8x16) Mask8x16 // EqualMasked compares for equality, masked. // // Asm: VPCMPB, CPU Feature: AVX512BW -func (x Int8x32) EqualMasked(y Int8x32, z Mask8x32) Mask8x32 +func (x Int8x32) EqualMasked(y Int8x32, mask Mask8x32) Mask8x32 // EqualMasked compares for equality, masked. // // Asm: VPCMPB, CPU Feature: AVX512BW -func (x Int8x64) EqualMasked(y Int8x64, z Mask8x64) Mask8x64 +func (x Int8x64) EqualMasked(y Int8x64, mask Mask8x64) Mask8x64 // EqualMasked compares for equality, masked. // // Asm: VPCMPW, CPU Feature: AVX512BW -func (x Int16x8) EqualMasked(y Int16x8, z Mask16x8) Mask16x8 +func (x Int16x8) EqualMasked(y Int16x8, mask Mask16x8) Mask16x8 // EqualMasked compares for equality, masked. // // Asm: VPCMPW, CPU Feature: AVX512BW -func (x Int16x16) EqualMasked(y Int16x16, z Mask16x16) Mask16x16 +func (x Int16x16) EqualMasked(y Int16x16, mask Mask16x16) Mask16x16 // EqualMasked compares for equality, masked. // // Asm: VPCMPW, CPU Feature: AVX512BW -func (x Int16x32) EqualMasked(y Int16x32, z Mask16x32) Mask16x32 +func (x Int16x32) EqualMasked(y Int16x32, mask Mask16x32) Mask16x32 // EqualMasked compares for equality, masked. // // Asm: VPCMPD, CPU Feature: AVX512F -func (x Int32x4) EqualMasked(y Int32x4, z Mask32x4) Mask32x4 +func (x Int32x4) EqualMasked(y Int32x4, mask Mask32x4) Mask32x4 // EqualMasked compares for equality, masked. // // Asm: VPCMPD, CPU Feature: AVX512F -func (x Int32x8) EqualMasked(y Int32x8, z Mask32x8) Mask32x8 +func (x Int32x8) EqualMasked(y Int32x8, mask Mask32x8) Mask32x8 // EqualMasked compares for equality, masked. // // Asm: VPCMPD, CPU Feature: AVX512F -func (x Int32x16) EqualMasked(y Int32x16, z Mask32x16) Mask32x16 +func (x Int32x16) EqualMasked(y Int32x16, mask Mask32x16) Mask32x16 // EqualMasked compares for equality, masked. // // Asm: VPCMPQ, CPU Feature: AVX512F -func (x Int64x2) EqualMasked(y Int64x2, z Mask64x2) Mask64x2 +func (x Int64x2) EqualMasked(y Int64x2, mask Mask64x2) Mask64x2 // EqualMasked compares for equality, masked. // // Asm: VPCMPQ, CPU Feature: AVX512F -func (x Int64x4) EqualMasked(y Int64x4, z Mask64x4) Mask64x4 +func (x Int64x4) EqualMasked(y Int64x4, mask Mask64x4) Mask64x4 // EqualMasked compares for equality, masked. // // Asm: VPCMPQ, CPU Feature: AVX512F -func (x Int64x8) EqualMasked(y Int64x8, z Mask64x8) Mask64x8 +func (x Int64x8) EqualMasked(y Int64x8, mask Mask64x8) Mask64x8 // EqualMasked compares for equality, masked. // // Asm: VPCMPUB, CPU Feature: AVX512BW -func (x Uint8x16) EqualMasked(y Uint8x16, z Mask8x16) Mask8x16 +func (x Uint8x16) EqualMasked(y Uint8x16, mask Mask8x16) Mask8x16 // EqualMasked compares for equality, masked. // // Asm: VPCMPUB, CPU Feature: AVX512BW -func (x Uint8x32) EqualMasked(y Uint8x32, z Mask8x32) Mask8x32 +func (x Uint8x32) EqualMasked(y Uint8x32, mask Mask8x32) Mask8x32 // EqualMasked compares for equality, masked. // // Asm: VPCMPUB, CPU Feature: AVX512BW -func (x Uint8x64) EqualMasked(y Uint8x64, z Mask8x64) Mask8x64 +func (x Uint8x64) EqualMasked(y Uint8x64, mask Mask8x64) Mask8x64 // EqualMasked compares for equality, masked. // // Asm: VPCMPUW, CPU Feature: AVX512BW -func (x Uint16x8) EqualMasked(y Uint16x8, z Mask16x8) Mask16x8 +func (x Uint16x8) EqualMasked(y Uint16x8, mask Mask16x8) Mask16x8 // EqualMasked compares for equality, masked. // // Asm: VPCMPUW, CPU Feature: AVX512BW -func (x Uint16x16) EqualMasked(y Uint16x16, z Mask16x16) Mask16x16 +func (x Uint16x16) EqualMasked(y Uint16x16, mask Mask16x16) Mask16x16 // EqualMasked compares for equality, masked. // // Asm: VPCMPUW, CPU Feature: AVX512BW -func (x Uint16x32) EqualMasked(y Uint16x32, z Mask16x32) Mask16x32 +func (x Uint16x32) EqualMasked(y Uint16x32, mask Mask16x32) Mask16x32 // EqualMasked compares for equality, masked. // // Asm: VPCMPUD, CPU Feature: AVX512F -func (x Uint32x4) EqualMasked(y Uint32x4, z Mask32x4) Mask32x4 +func (x Uint32x4) EqualMasked(y Uint32x4, mask Mask32x4) Mask32x4 // EqualMasked compares for equality, masked. // // Asm: VPCMPUD, CPU Feature: AVX512F -func (x Uint32x8) EqualMasked(y Uint32x8, z Mask32x8) Mask32x8 +func (x Uint32x8) EqualMasked(y Uint32x8, mask Mask32x8) Mask32x8 // EqualMasked compares for equality, masked. // // Asm: VPCMPUD, CPU Feature: AVX512F -func (x Uint32x16) EqualMasked(y Uint32x16, z Mask32x16) Mask32x16 +func (x Uint32x16) EqualMasked(y Uint32x16, mask Mask32x16) Mask32x16 // EqualMasked compares for equality, masked. // // Asm: VPCMPUQ, CPU Feature: AVX512F -func (x Uint64x2) EqualMasked(y Uint64x2, z Mask64x2) Mask64x2 +func (x Uint64x2) EqualMasked(y Uint64x2, mask Mask64x2) Mask64x2 // EqualMasked compares for equality, masked. // // Asm: VPCMPUQ, CPU Feature: AVX512F -func (x Uint64x4) EqualMasked(y Uint64x4, z Mask64x4) Mask64x4 +func (x Uint64x4) EqualMasked(y Uint64x4, mask Mask64x4) Mask64x4 // EqualMasked compares for equality, masked. // // Asm: VPCMPUQ, CPU Feature: AVX512F -func (x Uint64x8) EqualMasked(y Uint64x8, z Mask64x8) Mask64x8 +func (x Uint64x8) EqualMasked(y Uint64x8, mask Mask64x8) Mask64x8 /* Floor */ @@ -1894,42 +1894,42 @@ func (x Float64x8) FloorWithPrecision(prec uint8) Float64x8 // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPS, CPU Feature: AVX512F -func (x Float32x4) FloorWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4 +func (x Float32x4) FloorWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4 // FloorWithPrecisionMasked rounds elements down with specified precision, masked. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPS, CPU Feature: AVX512F -func (x Float32x8) FloorWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8 +func (x Float32x8) FloorWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8 // FloorWithPrecisionMasked rounds elements down with specified precision, masked. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPS, CPU Feature: AVX512F -func (x Float32x16) FloorWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16 +func (x Float32x16) FloorWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16 // FloorWithPrecisionMasked rounds elements down with specified precision, masked. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPD, CPU Feature: AVX512F -func (x Float64x2) FloorWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2 +func (x Float64x2) FloorWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2 // FloorWithPrecisionMasked rounds elements down with specified precision, masked. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPD, CPU Feature: AVX512F -func (x Float64x4) FloorWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4 +func (x Float64x4) FloorWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4 // FloorWithPrecisionMasked rounds elements down with specified precision, masked. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPD, CPU Feature: AVX512F -func (x Float64x8) FloorWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8 +func (x Float64x8) FloorWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8 /* FusedMultiplyAdd */ @@ -1968,32 +1968,32 @@ func (x Float64x8) FusedMultiplyAdd(y Float64x8, z Float64x8) Float64x8 // FusedMultiplyAddMasked performs `(v1 * v2) + v3`. // // Asm: VFMADD213PS, CPU Feature: AVX512F -func (x Float32x4) FusedMultiplyAddMasked(y Float32x4, z Float32x4, u Mask32x4) Float32x4 +func (x Float32x4) FusedMultiplyAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4 // FusedMultiplyAddMasked performs `(v1 * v2) + v3`. // // Asm: VFMADD213PS, CPU Feature: AVX512F -func (x Float32x8) FusedMultiplyAddMasked(y Float32x8, z Float32x8, u Mask32x8) Float32x8 +func (x Float32x8) FusedMultiplyAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8 // FusedMultiplyAddMasked performs `(v1 * v2) + v3`. // // Asm: VFMADD213PS, CPU Feature: AVX512F -func (x Float32x16) FusedMultiplyAddMasked(y Float32x16, z Float32x16, u Mask32x16) Float32x16 +func (x Float32x16) FusedMultiplyAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16 // FusedMultiplyAddMasked performs `(v1 * v2) + v3`. // // Asm: VFMADD213PD, CPU Feature: AVX512F -func (x Float64x2) FusedMultiplyAddMasked(y Float64x2, z Float64x2, u Mask64x2) Float64x2 +func (x Float64x2) FusedMultiplyAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2 // FusedMultiplyAddMasked performs `(v1 * v2) + v3`. // // Asm: VFMADD213PD, CPU Feature: AVX512F -func (x Float64x4) FusedMultiplyAddMasked(y Float64x4, z Float64x4, u Mask64x4) Float64x4 +func (x Float64x4) FusedMultiplyAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4 // FusedMultiplyAddMasked performs `(v1 * v2) + v3`. // // Asm: VFMADD213PD, CPU Feature: AVX512F -func (x Float64x8) FusedMultiplyAddMasked(y Float64x8, z Float64x8, u Mask64x8) Float64x8 +func (x Float64x8) FusedMultiplyAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8 /* FusedMultiplyAddSub */ @@ -2032,32 +2032,32 @@ func (x Float64x8) FusedMultiplyAddSub(y Float64x8, z Float64x8) Float64x8 // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. // // Asm: VFMADDSUB213PS, CPU Feature: AVX512F -func (x Float32x4) FusedMultiplyAddSubMasked(y Float32x4, z Float32x4, u Mask32x4) Float32x4 +func (x Float32x4) FusedMultiplyAddSubMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4 // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. // // Asm: VFMADDSUB213PS, CPU Feature: AVX512F -func (x Float32x8) FusedMultiplyAddSubMasked(y Float32x8, z Float32x8, u Mask32x8) Float32x8 +func (x Float32x8) FusedMultiplyAddSubMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8 // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. // // Asm: VFMADDSUB213PS, CPU Feature: AVX512F -func (x Float32x16) FusedMultiplyAddSubMasked(y Float32x16, z Float32x16, u Mask32x16) Float32x16 +func (x Float32x16) FusedMultiplyAddSubMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16 // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. // // Asm: VFMADDSUB213PD, CPU Feature: AVX512F -func (x Float64x2) FusedMultiplyAddSubMasked(y Float64x2, z Float64x2, u Mask64x2) Float64x2 +func (x Float64x2) FusedMultiplyAddSubMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2 // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. // // Asm: VFMADDSUB213PD, CPU Feature: AVX512F -func (x Float64x4) FusedMultiplyAddSubMasked(y Float64x4, z Float64x4, u Mask64x4) Float64x4 +func (x Float64x4) FusedMultiplyAddSubMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4 // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. // // Asm: VFMADDSUB213PD, CPU Feature: AVX512F -func (x Float64x8) FusedMultiplyAddSubMasked(y Float64x8, z Float64x8, u Mask64x8) Float64x8 +func (x Float64x8) FusedMultiplyAddSubMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8 /* FusedMultiplySubAdd */ @@ -2096,32 +2096,32 @@ func (x Float64x8) FusedMultiplySubAdd(y Float64x8, z Float64x8) Float64x8 // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. // // Asm: VFMSUBADD213PS, CPU Feature: AVX512F -func (x Float32x4) FusedMultiplySubAddMasked(y Float32x4, z Float32x4, u Mask32x4) Float32x4 +func (x Float32x4) FusedMultiplySubAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4 // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. // // Asm: VFMSUBADD213PS, CPU Feature: AVX512F -func (x Float32x8) FusedMultiplySubAddMasked(y Float32x8, z Float32x8, u Mask32x8) Float32x8 +func (x Float32x8) FusedMultiplySubAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8 // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. // // Asm: VFMSUBADD213PS, CPU Feature: AVX512F -func (x Float32x16) FusedMultiplySubAddMasked(y Float32x16, z Float32x16, u Mask32x16) Float32x16 +func (x Float32x16) FusedMultiplySubAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16 // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. // // Asm: VFMSUBADD213PD, CPU Feature: AVX512F -func (x Float64x2) FusedMultiplySubAddMasked(y Float64x2, z Float64x2, u Mask64x2) Float64x2 +func (x Float64x2) FusedMultiplySubAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2 // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. // // Asm: VFMSUBADD213PD, CPU Feature: AVX512F -func (x Float64x4) FusedMultiplySubAddMasked(y Float64x4, z Float64x4, u Mask64x4) Float64x4 +func (x Float64x4) FusedMultiplySubAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4 // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. // // Asm: VFMSUBADD213PD, CPU Feature: AVX512F -func (x Float64x8) FusedMultiplySubAddMasked(y Float64x8, z Float64x8, u Mask64x8) Float64x8 +func (x Float64x8) FusedMultiplySubAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8 /* GaloisFieldAffineTransform */ @@ -2283,19 +2283,19 @@ func (x Uint8x64) GaloisFieldMul(y Uint8x64) Uint8x64 // reduction polynomial x^8 + x^4 + x^3 + x + 1. // // Asm: VGF2P8MULB, CPU Feature: AVX512GFNI -func (x Uint8x16) GaloisFieldMulMasked(y Uint8x16, z Mask8x16) Uint8x16 +func (x Uint8x16) GaloisFieldMulMasked(y Uint8x16, mask Mask8x16) Uint8x16 // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with // reduction polynomial x^8 + x^4 + x^3 + x + 1. // // Asm: VGF2P8MULB, CPU Feature: AVX512GFNI -func (x Uint8x32) GaloisFieldMulMasked(y Uint8x32, z Mask8x32) Uint8x32 +func (x Uint8x32) GaloisFieldMulMasked(y Uint8x32, mask Mask8x32) Uint8x32 // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with // reduction polynomial x^8 + x^4 + x^3 + x + 1. // // Asm: VGF2P8MULB, CPU Feature: AVX512GFNI -func (x Uint8x64) GaloisFieldMulMasked(y Uint8x64, z Mask8x64) Uint8x64 +func (x Uint8x64) GaloisFieldMulMasked(y Uint8x64, mask Mask8x64) Uint8x64 /* Get128 */ @@ -2736,304 +2736,304 @@ func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8 // GreaterEqualMasked compares for greater than or equal. // // Asm: VCMPPS, CPU Feature: AVX512F -func (x Float32x4) GreaterEqualMasked(y Float32x4, z Mask32x4) Mask32x4 +func (x Float32x4) GreaterEqualMasked(y Float32x4, mask Mask32x4) Mask32x4 // GreaterEqualMasked compares for greater than or equal. // // Asm: VCMPPS, CPU Feature: AVX512F -func (x Float32x8) GreaterEqualMasked(y Float32x8, z Mask32x8) Mask32x8 +func (x Float32x8) GreaterEqualMasked(y Float32x8, mask Mask32x8) Mask32x8 // GreaterEqualMasked compares for greater than or equal. // // Asm: VCMPPS, CPU Feature: AVX512F -func (x Float32x16) GreaterEqualMasked(y Float32x16, z Mask32x16) Mask32x16 +func (x Float32x16) GreaterEqualMasked(y Float32x16, mask Mask32x16) Mask32x16 // GreaterEqualMasked compares for greater than or equal. // // Asm: VCMPPD, CPU Feature: AVX512F -func (x Float64x2) GreaterEqualMasked(y Float64x2, z Mask64x2) Mask64x2 +func (x Float64x2) GreaterEqualMasked(y Float64x2, mask Mask64x2) Mask64x2 // GreaterEqualMasked compares for greater than or equal. // // Asm: VCMPPD, CPU Feature: AVX512F -func (x Float64x4) GreaterEqualMasked(y Float64x4, z Mask64x4) Mask64x4 +func (x Float64x4) GreaterEqualMasked(y Float64x4, mask Mask64x4) Mask64x4 // GreaterEqualMasked compares for greater than or equal. // // Asm: VCMPPD, CPU Feature: AVX512F -func (x Float64x8) GreaterEqualMasked(y Float64x8, z Mask64x8) Mask64x8 +func (x Float64x8) GreaterEqualMasked(y Float64x8, mask Mask64x8) Mask64x8 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPB, CPU Feature: AVX512BW -func (x Int8x16) GreaterEqualMasked(y Int8x16, z Mask8x16) Mask8x16 +func (x Int8x16) GreaterEqualMasked(y Int8x16, mask Mask8x16) Mask8x16 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPB, CPU Feature: AVX512BW -func (x Int8x32) GreaterEqualMasked(y Int8x32, z Mask8x32) Mask8x32 +func (x Int8x32) GreaterEqualMasked(y Int8x32, mask Mask8x32) Mask8x32 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPB, CPU Feature: AVX512BW -func (x Int8x64) GreaterEqualMasked(y Int8x64, z Mask8x64) Mask8x64 +func (x Int8x64) GreaterEqualMasked(y Int8x64, mask Mask8x64) Mask8x64 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPW, CPU Feature: AVX512BW -func (x Int16x8) GreaterEqualMasked(y Int16x8, z Mask16x8) Mask16x8 +func (x Int16x8) GreaterEqualMasked(y Int16x8, mask Mask16x8) Mask16x8 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPW, CPU Feature: AVX512BW -func (x Int16x16) GreaterEqualMasked(y Int16x16, z Mask16x16) Mask16x16 +func (x Int16x16) GreaterEqualMasked(y Int16x16, mask Mask16x16) Mask16x16 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPW, CPU Feature: AVX512BW -func (x Int16x32) GreaterEqualMasked(y Int16x32, z Mask16x32) Mask16x32 +func (x Int16x32) GreaterEqualMasked(y Int16x32, mask Mask16x32) Mask16x32 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPD, CPU Feature: AVX512F -func (x Int32x4) GreaterEqualMasked(y Int32x4, z Mask32x4) Mask32x4 +func (x Int32x4) GreaterEqualMasked(y Int32x4, mask Mask32x4) Mask32x4 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPD, CPU Feature: AVX512F -func (x Int32x8) GreaterEqualMasked(y Int32x8, z Mask32x8) Mask32x8 +func (x Int32x8) GreaterEqualMasked(y Int32x8, mask Mask32x8) Mask32x8 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPD, CPU Feature: AVX512F -func (x Int32x16) GreaterEqualMasked(y Int32x16, z Mask32x16) Mask32x16 +func (x Int32x16) GreaterEqualMasked(y Int32x16, mask Mask32x16) Mask32x16 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPQ, CPU Feature: AVX512F -func (x Int64x2) GreaterEqualMasked(y Int64x2, z Mask64x2) Mask64x2 +func (x Int64x2) GreaterEqualMasked(y Int64x2, mask Mask64x2) Mask64x2 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPQ, CPU Feature: AVX512F -func (x Int64x4) GreaterEqualMasked(y Int64x4, z Mask64x4) Mask64x4 +func (x Int64x4) GreaterEqualMasked(y Int64x4, mask Mask64x4) Mask64x4 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPQ, CPU Feature: AVX512F -func (x Int64x8) GreaterEqualMasked(y Int64x8, z Mask64x8) Mask64x8 +func (x Int64x8) GreaterEqualMasked(y Int64x8, mask Mask64x8) Mask64x8 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPUB, CPU Feature: AVX512BW -func (x Uint8x16) GreaterEqualMasked(y Uint8x16, z Mask8x16) Mask8x16 +func (x Uint8x16) GreaterEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPUB, CPU Feature: AVX512BW -func (x Uint8x32) GreaterEqualMasked(y Uint8x32, z Mask8x32) Mask8x32 +func (x Uint8x32) GreaterEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPUB, CPU Feature: AVX512BW -func (x Uint8x64) GreaterEqualMasked(y Uint8x64, z Mask8x64) Mask8x64 +func (x Uint8x64) GreaterEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPUW, CPU Feature: AVX512BW -func (x Uint16x8) GreaterEqualMasked(y Uint16x8, z Mask16x8) Mask16x8 +func (x Uint16x8) GreaterEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPUW, CPU Feature: AVX512BW -func (x Uint16x16) GreaterEqualMasked(y Uint16x16, z Mask16x16) Mask16x16 +func (x Uint16x16) GreaterEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPUW, CPU Feature: AVX512BW -func (x Uint16x32) GreaterEqualMasked(y Uint16x32, z Mask16x32) Mask16x32 +func (x Uint16x32) GreaterEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPUD, CPU Feature: AVX512F -func (x Uint32x4) GreaterEqualMasked(y Uint32x4, z Mask32x4) Mask32x4 +func (x Uint32x4) GreaterEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPUD, CPU Feature: AVX512F -func (x Uint32x8) GreaterEqualMasked(y Uint32x8, z Mask32x8) Mask32x8 +func (x Uint32x8) GreaterEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPUD, CPU Feature: AVX512F -func (x Uint32x16) GreaterEqualMasked(y Uint32x16, z Mask32x16) Mask32x16 +func (x Uint32x16) GreaterEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPUQ, CPU Feature: AVX512F -func (x Uint64x2) GreaterEqualMasked(y Uint64x2, z Mask64x2) Mask64x2 +func (x Uint64x2) GreaterEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPUQ, CPU Feature: AVX512F -func (x Uint64x4) GreaterEqualMasked(y Uint64x4, z Mask64x4) Mask64x4 +func (x Uint64x4) GreaterEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4 // GreaterEqualMasked compares for greater than or equal. // // Asm: VPCMPUQ, CPU Feature: AVX512F -func (x Uint64x8) GreaterEqualMasked(y Uint64x8, z Mask64x8) Mask64x8 +func (x Uint64x8) GreaterEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8 /* GreaterMasked */ // GreaterMasked compares for greater than. // // Asm: VCMPPS, CPU Feature: AVX512F -func (x Float32x4) GreaterMasked(y Float32x4, z Mask32x4) Mask32x4 +func (x Float32x4) GreaterMasked(y Float32x4, mask Mask32x4) Mask32x4 // GreaterMasked compares for greater than. // // Asm: VCMPPS, CPU Feature: AVX512F -func (x Float32x8) GreaterMasked(y Float32x8, z Mask32x8) Mask32x8 +func (x Float32x8) GreaterMasked(y Float32x8, mask Mask32x8) Mask32x8 // GreaterMasked compares for greater than. // // Asm: VCMPPS, CPU Feature: AVX512F -func (x Float32x16) GreaterMasked(y Float32x16, z Mask32x16) Mask32x16 +func (x Float32x16) GreaterMasked(y Float32x16, mask Mask32x16) Mask32x16 // GreaterMasked compares for greater than. // // Asm: VCMPPD, CPU Feature: AVX512F -func (x Float64x2) GreaterMasked(y Float64x2, z Mask64x2) Mask64x2 +func (x Float64x2) GreaterMasked(y Float64x2, mask Mask64x2) Mask64x2 // GreaterMasked compares for greater than. // // Asm: VCMPPD, CPU Feature: AVX512F -func (x Float64x4) GreaterMasked(y Float64x4, z Mask64x4) Mask64x4 +func (x Float64x4) GreaterMasked(y Float64x4, mask Mask64x4) Mask64x4 // GreaterMasked compares for greater than. // // Asm: VCMPPD, CPU Feature: AVX512F -func (x Float64x8) GreaterMasked(y Float64x8, z Mask64x8) Mask64x8 +func (x Float64x8) GreaterMasked(y Float64x8, mask Mask64x8) Mask64x8 // GreaterMasked compares for greater than. // // Asm: VPCMPB, CPU Feature: AVX512BW -func (x Int8x16) GreaterMasked(y Int8x16, z Mask8x16) Mask8x16 +func (x Int8x16) GreaterMasked(y Int8x16, mask Mask8x16) Mask8x16 // GreaterMasked compares for greater than. // // Asm: VPCMPB, CPU Feature: AVX512BW -func (x Int8x32) GreaterMasked(y Int8x32, z Mask8x32) Mask8x32 +func (x Int8x32) GreaterMasked(y Int8x32, mask Mask8x32) Mask8x32 // GreaterMasked compares for greater than. // // Asm: VPCMPB, CPU Feature: AVX512BW -func (x Int8x64) GreaterMasked(y Int8x64, z Mask8x64) Mask8x64 +func (x Int8x64) GreaterMasked(y Int8x64, mask Mask8x64) Mask8x64 // GreaterMasked compares for greater than. // // Asm: VPCMPW, CPU Feature: AVX512BW -func (x Int16x8) GreaterMasked(y Int16x8, z Mask16x8) Mask16x8 +func (x Int16x8) GreaterMasked(y Int16x8, mask Mask16x8) Mask16x8 // GreaterMasked compares for greater than. // // Asm: VPCMPW, CPU Feature: AVX512BW -func (x Int16x16) GreaterMasked(y Int16x16, z Mask16x16) Mask16x16 +func (x Int16x16) GreaterMasked(y Int16x16, mask Mask16x16) Mask16x16 // GreaterMasked compares for greater than. // // Asm: VPCMPW, CPU Feature: AVX512BW -func (x Int16x32) GreaterMasked(y Int16x32, z Mask16x32) Mask16x32 +func (x Int16x32) GreaterMasked(y Int16x32, mask Mask16x32) Mask16x32 // GreaterMasked compares for greater than. // // Asm: VPCMPD, CPU Feature: AVX512F -func (x Int32x4) GreaterMasked(y Int32x4, z Mask32x4) Mask32x4 +func (x Int32x4) GreaterMasked(y Int32x4, mask Mask32x4) Mask32x4 // GreaterMasked compares for greater than. // // Asm: VPCMPD, CPU Feature: AVX512F -func (x Int32x8) GreaterMasked(y Int32x8, z Mask32x8) Mask32x8 +func (x Int32x8) GreaterMasked(y Int32x8, mask Mask32x8) Mask32x8 // GreaterMasked compares for greater than. // // Asm: VPCMPD, CPU Feature: AVX512F -func (x Int32x16) GreaterMasked(y Int32x16, z Mask32x16) Mask32x16 +func (x Int32x16) GreaterMasked(y Int32x16, mask Mask32x16) Mask32x16 // GreaterMasked compares for greater than. // // Asm: VPCMPQ, CPU Feature: AVX512F -func (x Int64x2) GreaterMasked(y Int64x2, z Mask64x2) Mask64x2 +func (x Int64x2) GreaterMasked(y Int64x2, mask Mask64x2) Mask64x2 // GreaterMasked compares for greater than. // // Asm: VPCMPQ, CPU Feature: AVX512F -func (x Int64x4) GreaterMasked(y Int64x4, z Mask64x4) Mask64x4 +func (x Int64x4) GreaterMasked(y Int64x4, mask Mask64x4) Mask64x4 // GreaterMasked compares for greater than. // // Asm: VPCMPQ, CPU Feature: AVX512F -func (x Int64x8) GreaterMasked(y Int64x8, z Mask64x8) Mask64x8 +func (x Int64x8) GreaterMasked(y Int64x8, mask Mask64x8) Mask64x8 // GreaterMasked compares for greater than. // // Asm: VPCMPUB, CPU Feature: AVX512BW -func (x Uint8x16) GreaterMasked(y Uint8x16, z Mask8x16) Mask8x16 +func (x Uint8x16) GreaterMasked(y Uint8x16, mask Mask8x16) Mask8x16 // GreaterMasked compares for greater than. // // Asm: VPCMPUB, CPU Feature: AVX512BW -func (x Uint8x32) GreaterMasked(y Uint8x32, z Mask8x32) Mask8x32 +func (x Uint8x32) GreaterMasked(y Uint8x32, mask Mask8x32) Mask8x32 // GreaterMasked compares for greater than. // // Asm: VPCMPUB, CPU Feature: AVX512BW -func (x Uint8x64) GreaterMasked(y Uint8x64, z Mask8x64) Mask8x64 +func (x Uint8x64) GreaterMasked(y Uint8x64, mask Mask8x64) Mask8x64 // GreaterMasked compares for greater than. // // Asm: VPCMPUW, CPU Feature: AVX512BW -func (x Uint16x8) GreaterMasked(y Uint16x8, z Mask16x8) Mask16x8 +func (x Uint16x8) GreaterMasked(y Uint16x8, mask Mask16x8) Mask16x8 // GreaterMasked compares for greater than. // // Asm: VPCMPUW, CPU Feature: AVX512BW -func (x Uint16x16) GreaterMasked(y Uint16x16, z Mask16x16) Mask16x16 +func (x Uint16x16) GreaterMasked(y Uint16x16, mask Mask16x16) Mask16x16 // GreaterMasked compares for greater than. // // Asm: VPCMPUW, CPU Feature: AVX512BW -func (x Uint16x32) GreaterMasked(y Uint16x32, z Mask16x32) Mask16x32 +func (x Uint16x32) GreaterMasked(y Uint16x32, mask Mask16x32) Mask16x32 // GreaterMasked compares for greater than. // // Asm: VPCMPUD, CPU Feature: AVX512F -func (x Uint32x4) GreaterMasked(y Uint32x4, z Mask32x4) Mask32x4 +func (x Uint32x4) GreaterMasked(y Uint32x4, mask Mask32x4) Mask32x4 // GreaterMasked compares for greater than. // // Asm: VPCMPUD, CPU Feature: AVX512F -func (x Uint32x8) GreaterMasked(y Uint32x8, z Mask32x8) Mask32x8 +func (x Uint32x8) GreaterMasked(y Uint32x8, mask Mask32x8) Mask32x8 // GreaterMasked compares for greater than. // // Asm: VPCMPUD, CPU Feature: AVX512F -func (x Uint32x16) GreaterMasked(y Uint32x16, z Mask32x16) Mask32x16 +func (x Uint32x16) GreaterMasked(y Uint32x16, mask Mask32x16) Mask32x16 // GreaterMasked compares for greater than. // // Asm: VPCMPUQ, CPU Feature: AVX512F -func (x Uint64x2) GreaterMasked(y Uint64x2, z Mask64x2) Mask64x2 +func (x Uint64x2) GreaterMasked(y Uint64x2, mask Mask64x2) Mask64x2 // GreaterMasked compares for greater than. // // Asm: VPCMPUQ, CPU Feature: AVX512F -func (x Uint64x4) GreaterMasked(y Uint64x4, z Mask64x4) Mask64x4 +func (x Uint64x4) GreaterMasked(y Uint64x4, mask Mask64x4) Mask64x4 // GreaterMasked compares for greater than. // // Asm: VPCMPUQ, CPU Feature: AVX512F -func (x Uint64x8) GreaterMasked(y Uint64x8, z Mask64x8) Mask64x8 +func (x Uint64x8) GreaterMasked(y Uint64x8, mask Mask64x8) Mask64x8 /* IsNan */ @@ -3072,32 +3072,32 @@ func (x Float64x8) IsNan(y Float64x8) Mask64x8 // IsNanMasked checks if elements are NaN. Use as x.IsNan(x). // // Asm: VCMPPS, CPU Feature: AVX512F -func (x Float32x4) IsNanMasked(y Float32x4, z Mask32x4) Mask32x4 +func (x Float32x4) IsNanMasked(y Float32x4, mask Mask32x4) Mask32x4 // IsNanMasked checks if elements are NaN. Use as x.IsNan(x). // // Asm: VCMPPS, CPU Feature: AVX512F -func (x Float32x8) IsNanMasked(y Float32x8, z Mask32x8) Mask32x8 +func (x Float32x8) IsNanMasked(y Float32x8, mask Mask32x8) Mask32x8 // IsNanMasked checks if elements are NaN. Use as x.IsNan(x). // // Asm: VCMPPS, CPU Feature: AVX512F -func (x Float32x16) IsNanMasked(y Float32x16, z Mask32x16) Mask32x16 +func (x Float32x16) IsNanMasked(y Float32x16, mask Mask32x16) Mask32x16 // IsNanMasked checks if elements are NaN. Use as x.IsNan(x). // // Asm: VCMPPD, CPU Feature: AVX512F -func (x Float64x2) IsNanMasked(y Float64x2, z Mask64x2) Mask64x2 +func (x Float64x2) IsNanMasked(y Float64x2, mask Mask64x2) Mask64x2 // IsNanMasked checks if elements are NaN. Use as x.IsNan(x). // // Asm: VCMPPD, CPU Feature: AVX512F -func (x Float64x4) IsNanMasked(y Float64x4, z Mask64x4) Mask64x4 +func (x Float64x4) IsNanMasked(y Float64x4, mask Mask64x4) Mask64x4 // IsNanMasked checks if elements are NaN. Use as x.IsNan(x). // // Asm: VCMPPD, CPU Feature: AVX512F -func (x Float64x8) IsNanMasked(y Float64x8, z Mask64x8) Mask64x8 +func (x Float64x8) IsNanMasked(y Float64x8, mask Mask64x8) Mask64x8 /* Less */ @@ -3408,304 +3408,304 @@ func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8 // LessEqualMasked compares for less than or equal. // // Asm: VCMPPS, CPU Feature: AVX512F -func (x Float32x4) LessEqualMasked(y Float32x4, z Mask32x4) Mask32x4 +func (x Float32x4) LessEqualMasked(y Float32x4, mask Mask32x4) Mask32x4 // LessEqualMasked compares for less than or equal. // // Asm: VCMPPS, CPU Feature: AVX512F -func (x Float32x8) LessEqualMasked(y Float32x8, z Mask32x8) Mask32x8 +func (x Float32x8) LessEqualMasked(y Float32x8, mask Mask32x8) Mask32x8 // LessEqualMasked compares for less than or equal. // // Asm: VCMPPS, CPU Feature: AVX512F -func (x Float32x16) LessEqualMasked(y Float32x16, z Mask32x16) Mask32x16 +func (x Float32x16) LessEqualMasked(y Float32x16, mask Mask32x16) Mask32x16 // LessEqualMasked compares for less than or equal. // // Asm: VCMPPD, CPU Feature: AVX512F -func (x Float64x2) LessEqualMasked(y Float64x2, z Mask64x2) Mask64x2 +func (x Float64x2) LessEqualMasked(y Float64x2, mask Mask64x2) Mask64x2 // LessEqualMasked compares for less than or equal. // // Asm: VCMPPD, CPU Feature: AVX512F -func (x Float64x4) LessEqualMasked(y Float64x4, z Mask64x4) Mask64x4 +func (x Float64x4) LessEqualMasked(y Float64x4, mask Mask64x4) Mask64x4 // LessEqualMasked compares for less than or equal. // // Asm: VCMPPD, CPU Feature: AVX512F -func (x Float64x8) LessEqualMasked(y Float64x8, z Mask64x8) Mask64x8 +func (x Float64x8) LessEqualMasked(y Float64x8, mask Mask64x8) Mask64x8 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPB, CPU Feature: AVX512BW -func (x Int8x16) LessEqualMasked(y Int8x16, z Mask8x16) Mask8x16 +func (x Int8x16) LessEqualMasked(y Int8x16, mask Mask8x16) Mask8x16 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPB, CPU Feature: AVX512BW -func (x Int8x32) LessEqualMasked(y Int8x32, z Mask8x32) Mask8x32 +func (x Int8x32) LessEqualMasked(y Int8x32, mask Mask8x32) Mask8x32 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPB, CPU Feature: AVX512BW -func (x Int8x64) LessEqualMasked(y Int8x64, z Mask8x64) Mask8x64 +func (x Int8x64) LessEqualMasked(y Int8x64, mask Mask8x64) Mask8x64 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPW, CPU Feature: AVX512BW -func (x Int16x8) LessEqualMasked(y Int16x8, z Mask16x8) Mask16x8 +func (x Int16x8) LessEqualMasked(y Int16x8, mask Mask16x8) Mask16x8 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPW, CPU Feature: AVX512BW -func (x Int16x16) LessEqualMasked(y Int16x16, z Mask16x16) Mask16x16 +func (x Int16x16) LessEqualMasked(y Int16x16, mask Mask16x16) Mask16x16 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPW, CPU Feature: AVX512BW -func (x Int16x32) LessEqualMasked(y Int16x32, z Mask16x32) Mask16x32 +func (x Int16x32) LessEqualMasked(y Int16x32, mask Mask16x32) Mask16x32 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPD, CPU Feature: AVX512F -func (x Int32x4) LessEqualMasked(y Int32x4, z Mask32x4) Mask32x4 +func (x Int32x4) LessEqualMasked(y Int32x4, mask Mask32x4) Mask32x4 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPD, CPU Feature: AVX512F -func (x Int32x8) LessEqualMasked(y Int32x8, z Mask32x8) Mask32x8 +func (x Int32x8) LessEqualMasked(y Int32x8, mask Mask32x8) Mask32x8 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPD, CPU Feature: AVX512F -func (x Int32x16) LessEqualMasked(y Int32x16, z Mask32x16) Mask32x16 +func (x Int32x16) LessEqualMasked(y Int32x16, mask Mask32x16) Mask32x16 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPQ, CPU Feature: AVX512F -func (x Int64x2) LessEqualMasked(y Int64x2, z Mask64x2) Mask64x2 +func (x Int64x2) LessEqualMasked(y Int64x2, mask Mask64x2) Mask64x2 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPQ, CPU Feature: AVX512F -func (x Int64x4) LessEqualMasked(y Int64x4, z Mask64x4) Mask64x4 +func (x Int64x4) LessEqualMasked(y Int64x4, mask Mask64x4) Mask64x4 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPQ, CPU Feature: AVX512F -func (x Int64x8) LessEqualMasked(y Int64x8, z Mask64x8) Mask64x8 +func (x Int64x8) LessEqualMasked(y Int64x8, mask Mask64x8) Mask64x8 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPUB, CPU Feature: AVX512BW -func (x Uint8x16) LessEqualMasked(y Uint8x16, z Mask8x16) Mask8x16 +func (x Uint8x16) LessEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPUB, CPU Feature: AVX512BW -func (x Uint8x32) LessEqualMasked(y Uint8x32, z Mask8x32) Mask8x32 +func (x Uint8x32) LessEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPUB, CPU Feature: AVX512BW -func (x Uint8x64) LessEqualMasked(y Uint8x64, z Mask8x64) Mask8x64 +func (x Uint8x64) LessEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPUW, CPU Feature: AVX512BW -func (x Uint16x8) LessEqualMasked(y Uint16x8, z Mask16x8) Mask16x8 +func (x Uint16x8) LessEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPUW, CPU Feature: AVX512BW -func (x Uint16x16) LessEqualMasked(y Uint16x16, z Mask16x16) Mask16x16 +func (x Uint16x16) LessEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPUW, CPU Feature: AVX512BW -func (x Uint16x32) LessEqualMasked(y Uint16x32, z Mask16x32) Mask16x32 +func (x Uint16x32) LessEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPUD, CPU Feature: AVX512F -func (x Uint32x4) LessEqualMasked(y Uint32x4, z Mask32x4) Mask32x4 +func (x Uint32x4) LessEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPUD, CPU Feature: AVX512F -func (x Uint32x8) LessEqualMasked(y Uint32x8, z Mask32x8) Mask32x8 +func (x Uint32x8) LessEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPUD, CPU Feature: AVX512F -func (x Uint32x16) LessEqualMasked(y Uint32x16, z Mask32x16) Mask32x16 +func (x Uint32x16) LessEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPUQ, CPU Feature: AVX512F -func (x Uint64x2) LessEqualMasked(y Uint64x2, z Mask64x2) Mask64x2 +func (x Uint64x2) LessEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPUQ, CPU Feature: AVX512F -func (x Uint64x4) LessEqualMasked(y Uint64x4, z Mask64x4) Mask64x4 +func (x Uint64x4) LessEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4 // LessEqualMasked compares for less than or equal. // // Asm: VPCMPUQ, CPU Feature: AVX512F -func (x Uint64x8) LessEqualMasked(y Uint64x8, z Mask64x8) Mask64x8 +func (x Uint64x8) LessEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8 /* LessMasked */ // LessMasked compares for less than. // // Asm: VCMPPS, CPU Feature: AVX512F -func (x Float32x4) LessMasked(y Float32x4, z Mask32x4) Mask32x4 +func (x Float32x4) LessMasked(y Float32x4, mask Mask32x4) Mask32x4 // LessMasked compares for less than. // // Asm: VCMPPS, CPU Feature: AVX512F -func (x Float32x8) LessMasked(y Float32x8, z Mask32x8) Mask32x8 +func (x Float32x8) LessMasked(y Float32x8, mask Mask32x8) Mask32x8 // LessMasked compares for less than. // // Asm: VCMPPS, CPU Feature: AVX512F -func (x Float32x16) LessMasked(y Float32x16, z Mask32x16) Mask32x16 +func (x Float32x16) LessMasked(y Float32x16, mask Mask32x16) Mask32x16 // LessMasked compares for less than. // // Asm: VCMPPD, CPU Feature: AVX512F -func (x Float64x2) LessMasked(y Float64x2, z Mask64x2) Mask64x2 +func (x Float64x2) LessMasked(y Float64x2, mask Mask64x2) Mask64x2 // LessMasked compares for less than. // // Asm: VCMPPD, CPU Feature: AVX512F -func (x Float64x4) LessMasked(y Float64x4, z Mask64x4) Mask64x4 +func (x Float64x4) LessMasked(y Float64x4, mask Mask64x4) Mask64x4 // LessMasked compares for less than. // // Asm: VCMPPD, CPU Feature: AVX512F -func (x Float64x8) LessMasked(y Float64x8, z Mask64x8) Mask64x8 +func (x Float64x8) LessMasked(y Float64x8, mask Mask64x8) Mask64x8 // LessMasked compares for less than. // // Asm: VPCMPB, CPU Feature: AVX512BW -func (x Int8x16) LessMasked(y Int8x16, z Mask8x16) Mask8x16 +func (x Int8x16) LessMasked(y Int8x16, mask Mask8x16) Mask8x16 // LessMasked compares for less than. // // Asm: VPCMPB, CPU Feature: AVX512BW -func (x Int8x32) LessMasked(y Int8x32, z Mask8x32) Mask8x32 +func (x Int8x32) LessMasked(y Int8x32, mask Mask8x32) Mask8x32 // LessMasked compares for less than. // // Asm: VPCMPB, CPU Feature: AVX512BW -func (x Int8x64) LessMasked(y Int8x64, z Mask8x64) Mask8x64 +func (x Int8x64) LessMasked(y Int8x64, mask Mask8x64) Mask8x64 // LessMasked compares for less than. // // Asm: VPCMPW, CPU Feature: AVX512BW -func (x Int16x8) LessMasked(y Int16x8, z Mask16x8) Mask16x8 +func (x Int16x8) LessMasked(y Int16x8, mask Mask16x8) Mask16x8 // LessMasked compares for less than. // // Asm: VPCMPW, CPU Feature: AVX512BW -func (x Int16x16) LessMasked(y Int16x16, z Mask16x16) Mask16x16 +func (x Int16x16) LessMasked(y Int16x16, mask Mask16x16) Mask16x16 // LessMasked compares for less than. // // Asm: VPCMPW, CPU Feature: AVX512BW -func (x Int16x32) LessMasked(y Int16x32, z Mask16x32) Mask16x32 +func (x Int16x32) LessMasked(y Int16x32, mask Mask16x32) Mask16x32 // LessMasked compares for less than. // // Asm: VPCMPD, CPU Feature: AVX512F -func (x Int32x4) LessMasked(y Int32x4, z Mask32x4) Mask32x4 +func (x Int32x4) LessMasked(y Int32x4, mask Mask32x4) Mask32x4 // LessMasked compares for less than. // // Asm: VPCMPD, CPU Feature: AVX512F -func (x Int32x8) LessMasked(y Int32x8, z Mask32x8) Mask32x8 +func (x Int32x8) LessMasked(y Int32x8, mask Mask32x8) Mask32x8 // LessMasked compares for less than. // // Asm: VPCMPD, CPU Feature: AVX512F -func (x Int32x16) LessMasked(y Int32x16, z Mask32x16) Mask32x16 +func (x Int32x16) LessMasked(y Int32x16, mask Mask32x16) Mask32x16 // LessMasked compares for less than. // // Asm: VPCMPQ, CPU Feature: AVX512F -func (x Int64x2) LessMasked(y Int64x2, z Mask64x2) Mask64x2 +func (x Int64x2) LessMasked(y Int64x2, mask Mask64x2) Mask64x2 // LessMasked compares for less than. // // Asm: VPCMPQ, CPU Feature: AVX512F -func (x Int64x4) LessMasked(y Int64x4, z Mask64x4) Mask64x4 +func (x Int64x4) LessMasked(y Int64x4, mask Mask64x4) Mask64x4 // LessMasked compares for less than. // // Asm: VPCMPQ, CPU Feature: AVX512F -func (x Int64x8) LessMasked(y Int64x8, z Mask64x8) Mask64x8 +func (x Int64x8) LessMasked(y Int64x8, mask Mask64x8) Mask64x8 // LessMasked compares for less than. // // Asm: VPCMPUB, CPU Feature: AVX512BW -func (x Uint8x16) LessMasked(y Uint8x16, z Mask8x16) Mask8x16 +func (x Uint8x16) LessMasked(y Uint8x16, mask Mask8x16) Mask8x16 // LessMasked compares for less than. // // Asm: VPCMPUB, CPU Feature: AVX512BW -func (x Uint8x32) LessMasked(y Uint8x32, z Mask8x32) Mask8x32 +func (x Uint8x32) LessMasked(y Uint8x32, mask Mask8x32) Mask8x32 // LessMasked compares for less than. // // Asm: VPCMPUB, CPU Feature: AVX512BW -func (x Uint8x64) LessMasked(y Uint8x64, z Mask8x64) Mask8x64 +func (x Uint8x64) LessMasked(y Uint8x64, mask Mask8x64) Mask8x64 // LessMasked compares for less than. // // Asm: VPCMPUW, CPU Feature: AVX512BW -func (x Uint16x8) LessMasked(y Uint16x8, z Mask16x8) Mask16x8 +func (x Uint16x8) LessMasked(y Uint16x8, mask Mask16x8) Mask16x8 // LessMasked compares for less than. // // Asm: VPCMPUW, CPU Feature: AVX512BW -func (x Uint16x16) LessMasked(y Uint16x16, z Mask16x16) Mask16x16 +func (x Uint16x16) LessMasked(y Uint16x16, mask Mask16x16) Mask16x16 // LessMasked compares for less than. // // Asm: VPCMPUW, CPU Feature: AVX512BW -func (x Uint16x32) LessMasked(y Uint16x32, z Mask16x32) Mask16x32 +func (x Uint16x32) LessMasked(y Uint16x32, mask Mask16x32) Mask16x32 // LessMasked compares for less than. // // Asm: VPCMPUD, CPU Feature: AVX512F -func (x Uint32x4) LessMasked(y Uint32x4, z Mask32x4) Mask32x4 +func (x Uint32x4) LessMasked(y Uint32x4, mask Mask32x4) Mask32x4 // LessMasked compares for less than. // // Asm: VPCMPUD, CPU Feature: AVX512F -func (x Uint32x8) LessMasked(y Uint32x8, z Mask32x8) Mask32x8 +func (x Uint32x8) LessMasked(y Uint32x8, mask Mask32x8) Mask32x8 // LessMasked compares for less than. // // Asm: VPCMPUD, CPU Feature: AVX512F -func (x Uint32x16) LessMasked(y Uint32x16, z Mask32x16) Mask32x16 +func (x Uint32x16) LessMasked(y Uint32x16, mask Mask32x16) Mask32x16 // LessMasked compares for less than. // // Asm: VPCMPUQ, CPU Feature: AVX512F -func (x Uint64x2) LessMasked(y Uint64x2, z Mask64x2) Mask64x2 +func (x Uint64x2) LessMasked(y Uint64x2, mask Mask64x2) Mask64x2 // LessMasked compares for less than. // // Asm: VPCMPUQ, CPU Feature: AVX512F -func (x Uint64x4) LessMasked(y Uint64x4, z Mask64x4) Mask64x4 +func (x Uint64x4) LessMasked(y Uint64x4, mask Mask64x4) Mask64x4 // LessMasked compares for less than. // // Asm: VPCMPUQ, CPU Feature: AVX512F -func (x Uint64x8) LessMasked(y Uint64x8, z Mask64x8) Mask64x8 +func (x Uint64x8) LessMasked(y Uint64x8, mask Mask64x8) Mask64x8 /* Max */ @@ -3864,152 +3864,152 @@ func (x Uint64x8) Max(y Uint64x8) Uint64x8 // MaxMasked computes the maximum of corresponding elements. // // Asm: VMAXPS, CPU Feature: AVX512F -func (x Float32x4) MaxMasked(y Float32x4, z Mask32x4) Float32x4 +func (x Float32x4) MaxMasked(y Float32x4, mask Mask32x4) Float32x4 // MaxMasked computes the maximum of corresponding elements. // // Asm: VMAXPS, CPU Feature: AVX512F -func (x Float32x8) MaxMasked(y Float32x8, z Mask32x8) Float32x8 +func (x Float32x8) MaxMasked(y Float32x8, mask Mask32x8) Float32x8 // MaxMasked computes the maximum of corresponding elements. // // Asm: VMAXPS, CPU Feature: AVX512F -func (x Float32x16) MaxMasked(y Float32x16, z Mask32x16) Float32x16 +func (x Float32x16) MaxMasked(y Float32x16, mask Mask32x16) Float32x16 // MaxMasked computes the maximum of corresponding elements. // // Asm: VMAXPD, CPU Feature: AVX512F -func (x Float64x2) MaxMasked(y Float64x2, z Mask64x2) Float64x2 +func (x Float64x2) MaxMasked(y Float64x2, mask Mask64x2) Float64x2 // MaxMasked computes the maximum of corresponding elements. // // Asm: VMAXPD, CPU Feature: AVX512F -func (x Float64x4) MaxMasked(y Float64x4, z Mask64x4) Float64x4 +func (x Float64x4) MaxMasked(y Float64x4, mask Mask64x4) Float64x4 // MaxMasked computes the maximum of corresponding elements. // // Asm: VMAXPD, CPU Feature: AVX512F -func (x Float64x8) MaxMasked(y Float64x8, z Mask64x8) Float64x8 +func (x Float64x8) MaxMasked(y Float64x8, mask Mask64x8) Float64x8 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXSB, CPU Feature: AVX512BW -func (x Int8x16) MaxMasked(y Int8x16, z Mask8x16) Int8x16 +func (x Int8x16) MaxMasked(y Int8x16, mask Mask8x16) Int8x16 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXSB, CPU Feature: AVX512BW -func (x Int8x32) MaxMasked(y Int8x32, z Mask8x32) Int8x32 +func (x Int8x32) MaxMasked(y Int8x32, mask Mask8x32) Int8x32 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXSB, CPU Feature: AVX512BW -func (x Int8x64) MaxMasked(y Int8x64, z Mask8x64) Int8x64 +func (x Int8x64) MaxMasked(y Int8x64, mask Mask8x64) Int8x64 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXSW, CPU Feature: AVX512BW -func (x Int16x8) MaxMasked(y Int16x8, z Mask16x8) Int16x8 +func (x Int16x8) MaxMasked(y Int16x8, mask Mask16x8) Int16x8 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXSW, CPU Feature: AVX512BW -func (x Int16x16) MaxMasked(y Int16x16, z Mask16x16) Int16x16 +func (x Int16x16) MaxMasked(y Int16x16, mask Mask16x16) Int16x16 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXSW, CPU Feature: AVX512BW -func (x Int16x32) MaxMasked(y Int16x32, z Mask16x32) Int16x32 +func (x Int16x32) MaxMasked(y Int16x32, mask Mask16x32) Int16x32 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXSD, CPU Feature: AVX512F -func (x Int32x4) MaxMasked(y Int32x4, z Mask32x4) Int32x4 +func (x Int32x4) MaxMasked(y Int32x4, mask Mask32x4) Int32x4 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXSD, CPU Feature: AVX512F -func (x Int32x8) MaxMasked(y Int32x8, z Mask32x8) Int32x8 +func (x Int32x8) MaxMasked(y Int32x8, mask Mask32x8) Int32x8 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXSD, CPU Feature: AVX512F -func (x Int32x16) MaxMasked(y Int32x16, z Mask32x16) Int32x16 +func (x Int32x16) MaxMasked(y Int32x16, mask Mask32x16) Int32x16 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXSQ, CPU Feature: AVX512F -func (x Int64x2) MaxMasked(y Int64x2, z Mask64x2) Int64x2 +func (x Int64x2) MaxMasked(y Int64x2, mask Mask64x2) Int64x2 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXSQ, CPU Feature: AVX512F -func (x Int64x4) MaxMasked(y Int64x4, z Mask64x4) Int64x4 +func (x Int64x4) MaxMasked(y Int64x4, mask Mask64x4) Int64x4 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXSQ, CPU Feature: AVX512F -func (x Int64x8) MaxMasked(y Int64x8, z Mask64x8) Int64x8 +func (x Int64x8) MaxMasked(y Int64x8, mask Mask64x8) Int64x8 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXUB, CPU Feature: AVX512BW -func (x Uint8x16) MaxMasked(y Uint8x16, z Mask8x16) Uint8x16 +func (x Uint8x16) MaxMasked(y Uint8x16, mask Mask8x16) Uint8x16 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXUB, CPU Feature: AVX512BW -func (x Uint8x32) MaxMasked(y Uint8x32, z Mask8x32) Uint8x32 +func (x Uint8x32) MaxMasked(y Uint8x32, mask Mask8x32) Uint8x32 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXUB, CPU Feature: AVX512BW -func (x Uint8x64) MaxMasked(y Uint8x64, z Mask8x64) Uint8x64 +func (x Uint8x64) MaxMasked(y Uint8x64, mask Mask8x64) Uint8x64 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXUW, CPU Feature: AVX512BW -func (x Uint16x8) MaxMasked(y Uint16x8, z Mask16x8) Uint16x8 +func (x Uint16x8) MaxMasked(y Uint16x8, mask Mask16x8) Uint16x8 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXUW, CPU Feature: AVX512BW -func (x Uint16x16) MaxMasked(y Uint16x16, z Mask16x16) Uint16x16 +func (x Uint16x16) MaxMasked(y Uint16x16, mask Mask16x16) Uint16x16 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXUW, CPU Feature: AVX512BW -func (x Uint16x32) MaxMasked(y Uint16x32, z Mask16x32) Uint16x32 +func (x Uint16x32) MaxMasked(y Uint16x32, mask Mask16x32) Uint16x32 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXUD, CPU Feature: AVX512F -func (x Uint32x4) MaxMasked(y Uint32x4, z Mask32x4) Uint32x4 +func (x Uint32x4) MaxMasked(y Uint32x4, mask Mask32x4) Uint32x4 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXUD, CPU Feature: AVX512F -func (x Uint32x8) MaxMasked(y Uint32x8, z Mask32x8) Uint32x8 +func (x Uint32x8) MaxMasked(y Uint32x8, mask Mask32x8) Uint32x8 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXUD, CPU Feature: AVX512F -func (x Uint32x16) MaxMasked(y Uint32x16, z Mask32x16) Uint32x16 +func (x Uint32x16) MaxMasked(y Uint32x16, mask Mask32x16) Uint32x16 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXUQ, CPU Feature: AVX512F -func (x Uint64x2) MaxMasked(y Uint64x2, z Mask64x2) Uint64x2 +func (x Uint64x2) MaxMasked(y Uint64x2, mask Mask64x2) Uint64x2 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXUQ, CPU Feature: AVX512F -func (x Uint64x4) MaxMasked(y Uint64x4, z Mask64x4) Uint64x4 +func (x Uint64x4) MaxMasked(y Uint64x4, mask Mask64x4) Uint64x4 // MaxMasked computes the maximum of corresponding elements. // // Asm: VPMAXUQ, CPU Feature: AVX512F -func (x Uint64x8) MaxMasked(y Uint64x8, z Mask64x8) Uint64x8 +func (x Uint64x8) MaxMasked(y Uint64x8, mask Mask64x8) Uint64x8 /* Min */ @@ -4168,152 +4168,152 @@ func (x Uint64x8) Min(y Uint64x8) Uint64x8 // MinMasked computes the minimum of corresponding elements. // // Asm: VMINPS, CPU Feature: AVX512F -func (x Float32x4) MinMasked(y Float32x4, z Mask32x4) Float32x4 +func (x Float32x4) MinMasked(y Float32x4, mask Mask32x4) Float32x4 // MinMasked computes the minimum of corresponding elements. // // Asm: VMINPS, CPU Feature: AVX512F -func (x Float32x8) MinMasked(y Float32x8, z Mask32x8) Float32x8 +func (x Float32x8) MinMasked(y Float32x8, mask Mask32x8) Float32x8 // MinMasked computes the minimum of corresponding elements. // // Asm: VMINPS, CPU Feature: AVX512F -func (x Float32x16) MinMasked(y Float32x16, z Mask32x16) Float32x16 +func (x Float32x16) MinMasked(y Float32x16, mask Mask32x16) Float32x16 // MinMasked computes the minimum of corresponding elements. // // Asm: VMINPD, CPU Feature: AVX512F -func (x Float64x2) MinMasked(y Float64x2, z Mask64x2) Float64x2 +func (x Float64x2) MinMasked(y Float64x2, mask Mask64x2) Float64x2 // MinMasked computes the minimum of corresponding elements. // // Asm: VMINPD, CPU Feature: AVX512F -func (x Float64x4) MinMasked(y Float64x4, z Mask64x4) Float64x4 +func (x Float64x4) MinMasked(y Float64x4, mask Mask64x4) Float64x4 // MinMasked computes the minimum of corresponding elements. // // Asm: VMINPD, CPU Feature: AVX512F -func (x Float64x8) MinMasked(y Float64x8, z Mask64x8) Float64x8 +func (x Float64x8) MinMasked(y Float64x8, mask Mask64x8) Float64x8 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINSB, CPU Feature: AVX512BW -func (x Int8x16) MinMasked(y Int8x16, z Mask8x16) Int8x16 +func (x Int8x16) MinMasked(y Int8x16, mask Mask8x16) Int8x16 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINSB, CPU Feature: AVX512BW -func (x Int8x32) MinMasked(y Int8x32, z Mask8x32) Int8x32 +func (x Int8x32) MinMasked(y Int8x32, mask Mask8x32) Int8x32 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINSB, CPU Feature: AVX512BW -func (x Int8x64) MinMasked(y Int8x64, z Mask8x64) Int8x64 +func (x Int8x64) MinMasked(y Int8x64, mask Mask8x64) Int8x64 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINSW, CPU Feature: AVX512BW -func (x Int16x8) MinMasked(y Int16x8, z Mask16x8) Int16x8 +func (x Int16x8) MinMasked(y Int16x8, mask Mask16x8) Int16x8 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINSW, CPU Feature: AVX512BW -func (x Int16x16) MinMasked(y Int16x16, z Mask16x16) Int16x16 +func (x Int16x16) MinMasked(y Int16x16, mask Mask16x16) Int16x16 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINSW, CPU Feature: AVX512BW -func (x Int16x32) MinMasked(y Int16x32, z Mask16x32) Int16x32 +func (x Int16x32) MinMasked(y Int16x32, mask Mask16x32) Int16x32 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINSD, CPU Feature: AVX512F -func (x Int32x4) MinMasked(y Int32x4, z Mask32x4) Int32x4 +func (x Int32x4) MinMasked(y Int32x4, mask Mask32x4) Int32x4 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINSD, CPU Feature: AVX512F -func (x Int32x8) MinMasked(y Int32x8, z Mask32x8) Int32x8 +func (x Int32x8) MinMasked(y Int32x8, mask Mask32x8) Int32x8 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINSD, CPU Feature: AVX512F -func (x Int32x16) MinMasked(y Int32x16, z Mask32x16) Int32x16 +func (x Int32x16) MinMasked(y Int32x16, mask Mask32x16) Int32x16 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINSQ, CPU Feature: AVX512F -func (x Int64x2) MinMasked(y Int64x2, z Mask64x2) Int64x2 +func (x Int64x2) MinMasked(y Int64x2, mask Mask64x2) Int64x2 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINSQ, CPU Feature: AVX512F -func (x Int64x4) MinMasked(y Int64x4, z Mask64x4) Int64x4 +func (x Int64x4) MinMasked(y Int64x4, mask Mask64x4) Int64x4 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINSQ, CPU Feature: AVX512F -func (x Int64x8) MinMasked(y Int64x8, z Mask64x8) Int64x8 +func (x Int64x8) MinMasked(y Int64x8, mask Mask64x8) Int64x8 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINUB, CPU Feature: AVX512BW -func (x Uint8x16) MinMasked(y Uint8x16, z Mask8x16) Uint8x16 +func (x Uint8x16) MinMasked(y Uint8x16, mask Mask8x16) Uint8x16 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINUB, CPU Feature: AVX512BW -func (x Uint8x32) MinMasked(y Uint8x32, z Mask8x32) Uint8x32 +func (x Uint8x32) MinMasked(y Uint8x32, mask Mask8x32) Uint8x32 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINUB, CPU Feature: AVX512BW -func (x Uint8x64) MinMasked(y Uint8x64, z Mask8x64) Uint8x64 +func (x Uint8x64) MinMasked(y Uint8x64, mask Mask8x64) Uint8x64 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINUW, CPU Feature: AVX512BW -func (x Uint16x8) MinMasked(y Uint16x8, z Mask16x8) Uint16x8 +func (x Uint16x8) MinMasked(y Uint16x8, mask Mask16x8) Uint16x8 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINUW, CPU Feature: AVX512BW -func (x Uint16x16) MinMasked(y Uint16x16, z Mask16x16) Uint16x16 +func (x Uint16x16) MinMasked(y Uint16x16, mask Mask16x16) Uint16x16 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINUW, CPU Feature: AVX512BW -func (x Uint16x32) MinMasked(y Uint16x32, z Mask16x32) Uint16x32 +func (x Uint16x32) MinMasked(y Uint16x32, mask Mask16x32) Uint16x32 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINUD, CPU Feature: AVX512F -func (x Uint32x4) MinMasked(y Uint32x4, z Mask32x4) Uint32x4 +func (x Uint32x4) MinMasked(y Uint32x4, mask Mask32x4) Uint32x4 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINUD, CPU Feature: AVX512F -func (x Uint32x8) MinMasked(y Uint32x8, z Mask32x8) Uint32x8 +func (x Uint32x8) MinMasked(y Uint32x8, mask Mask32x8) Uint32x8 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINUD, CPU Feature: AVX512F -func (x Uint32x16) MinMasked(y Uint32x16, z Mask32x16) Uint32x16 +func (x Uint32x16) MinMasked(y Uint32x16, mask Mask32x16) Uint32x16 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINUQ, CPU Feature: AVX512F -func (x Uint64x2) MinMasked(y Uint64x2, z Mask64x2) Uint64x2 +func (x Uint64x2) MinMasked(y Uint64x2, mask Mask64x2) Uint64x2 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINUQ, CPU Feature: AVX512F -func (x Uint64x4) MinMasked(y Uint64x4, z Mask64x4) Uint64x4 +func (x Uint64x4) MinMasked(y Uint64x4, mask Mask64x4) Uint64x4 // MinMasked computes the minimum of corresponding elements. // // Asm: VPMINUQ, CPU Feature: AVX512F -func (x Uint64x8) MinMasked(y Uint64x8, z Mask64x8) Uint64x8 +func (x Uint64x8) MinMasked(y Uint64x8, mask Mask64x8) Uint64x8 /* Mul */ @@ -4384,32 +4384,32 @@ func (x Float64x8) MulByPowOf2(y Float64x8) Float64x8 // MulByPowOf2Masked multiplies elements by a power of 2. // // Asm: VSCALEFPS, CPU Feature: AVX512F -func (x Float32x4) MulByPowOf2Masked(y Float32x4, z Mask32x4) Float32x4 +func (x Float32x4) MulByPowOf2Masked(y Float32x4, mask Mask32x4) Float32x4 // MulByPowOf2Masked multiplies elements by a power of 2. // // Asm: VSCALEFPS, CPU Feature: AVX512F -func (x Float32x8) MulByPowOf2Masked(y Float32x8, z Mask32x8) Float32x8 +func (x Float32x8) MulByPowOf2Masked(y Float32x8, mask Mask32x8) Float32x8 // MulByPowOf2Masked multiplies elements by a power of 2. // // Asm: VSCALEFPS, CPU Feature: AVX512F -func (x Float32x16) MulByPowOf2Masked(y Float32x16, z Mask32x16) Float32x16 +func (x Float32x16) MulByPowOf2Masked(y Float32x16, mask Mask32x16) Float32x16 // MulByPowOf2Masked multiplies elements by a power of 2. // // Asm: VSCALEFPD, CPU Feature: AVX512F -func (x Float64x2) MulByPowOf2Masked(y Float64x2, z Mask64x2) Float64x2 +func (x Float64x2) MulByPowOf2Masked(y Float64x2, mask Mask64x2) Float64x2 // MulByPowOf2Masked multiplies elements by a power of 2. // // Asm: VSCALEFPD, CPU Feature: AVX512F -func (x Float64x4) MulByPowOf2Masked(y Float64x4, z Mask64x4) Float64x4 +func (x Float64x4) MulByPowOf2Masked(y Float64x4, mask Mask64x4) Float64x4 // MulByPowOf2Masked multiplies elements by a power of 2. // // Asm: VSCALEFPD, CPU Feature: AVX512F -func (x Float64x8) MulByPowOf2Masked(y Float64x8, z Mask64x8) Float64x8 +func (x Float64x8) MulByPowOf2Masked(y Float64x8, mask Mask64x8) Float64x8 /* MulEvenWiden */ @@ -4479,37 +4479,37 @@ func (x Uint64x8) MulEvenWiden(y Uint64x8) Uint64x8 // Result[i] = v1.Even[i] * v2.Even[i]. // // Asm: VPMULDQ, CPU Feature: AVX512F -func (x Int64x2) MulEvenWidenMasked(y Int64x2, z Mask64x2) Int64x2 +func (x Int64x2) MulEvenWidenMasked(y Int64x2, mask Mask64x2) Int64x2 // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked. // Result[i] = v1.Even[i] * v2.Even[i]. // // Asm: VPMULDQ, CPU Feature: AVX512F -func (x Int64x4) MulEvenWidenMasked(y Int64x4, z Mask64x4) Int64x4 +func (x Int64x4) MulEvenWidenMasked(y Int64x4, mask Mask64x4) Int64x4 // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked. // Result[i] = v1.Even[i] * v2.Even[i]. // // Asm: VPMULDQ, CPU Feature: AVX512F -func (x Int64x8) MulEvenWidenMasked(y Int64x8, z Mask64x8) Int64x8 +func (x Int64x8) MulEvenWidenMasked(y Int64x8, mask Mask64x8) Int64x8 // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked. // Result[i] = v1.Even[i] * v2.Even[i]. // // Asm: VPMULUDQ, CPU Feature: AVX512F -func (x Uint64x2) MulEvenWidenMasked(y Uint64x2, z Mask64x2) Uint64x2 +func (x Uint64x2) MulEvenWidenMasked(y Uint64x2, mask Mask64x2) Uint64x2 // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked. // Result[i] = v1.Even[i] * v2.Even[i]. // // Asm: VPMULUDQ, CPU Feature: AVX512F -func (x Uint64x4) MulEvenWidenMasked(y Uint64x4, z Mask64x4) Uint64x4 +func (x Uint64x4) MulEvenWidenMasked(y Uint64x4, mask Mask64x4) Uint64x4 // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked. // Result[i] = v1.Even[i] * v2.Even[i]. // // Asm: VPMULUDQ, CPU Feature: AVX512F -func (x Uint64x8) MulEvenWidenMasked(y Uint64x8, z Mask64x8) Uint64x8 +func (x Uint64x8) MulEvenWidenMasked(y Uint64x8, mask Mask64x8) Uint64x8 /* MulHigh */ @@ -4548,32 +4548,32 @@ func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32 // MulHighMasked multiplies elements and stores the high part of the result, masked. // // Asm: VPMULHW, CPU Feature: AVX512BW -func (x Int16x8) MulHighMasked(y Int16x8, z Mask16x8) Int16x8 +func (x Int16x8) MulHighMasked(y Int16x8, mask Mask16x8) Int16x8 // MulHighMasked multiplies elements and stores the high part of the result, masked. // // Asm: VPMULHW, CPU Feature: AVX512BW -func (x Int16x16) MulHighMasked(y Int16x16, z Mask16x16) Int16x16 +func (x Int16x16) MulHighMasked(y Int16x16, mask Mask16x16) Int16x16 // MulHighMasked multiplies elements and stores the high part of the result, masked. // // Asm: VPMULHW, CPU Feature: AVX512BW -func (x Int16x32) MulHighMasked(y Int16x32, z Mask16x32) Int16x32 +func (x Int16x32) MulHighMasked(y Int16x32, mask Mask16x32) Int16x32 // MulHighMasked multiplies elements and stores the high part of the result, masked. // // Asm: VPMULHUW, CPU Feature: AVX512BW -func (x Uint16x8) MulHighMasked(y Uint16x8, z Mask16x8) Uint16x8 +func (x Uint16x8) MulHighMasked(y Uint16x8, mask Mask16x8) Uint16x8 // MulHighMasked multiplies elements and stores the high part of the result, masked. // // Asm: VPMULHUW, CPU Feature: AVX512BW -func (x Uint16x16) MulHighMasked(y Uint16x16, z Mask16x16) Uint16x16 +func (x Uint16x16) MulHighMasked(y Uint16x16, mask Mask16x16) Uint16x16 // MulHighMasked multiplies elements and stores the high part of the result, masked. // // Asm: VPMULHUW, CPU Feature: AVX512BW -func (x Uint16x32) MulHighMasked(y Uint16x32, z Mask16x32) Uint16x32 +func (x Uint16x32) MulHighMasked(y Uint16x32, mask Mask16x32) Uint16x32 /* MulLow */ @@ -4627,79 +4627,79 @@ func (x Int64x8) MulLow(y Int64x8) Int64x8 // MulLowMasked multiplies elements and stores the low part of the result, masked. // // Asm: VPMULLW, CPU Feature: AVX512BW -func (x Int16x8) MulLowMasked(y Int16x8, z Mask16x8) Int16x8 +func (x Int16x8) MulLowMasked(y Int16x8, mask Mask16x8) Int16x8 // MulLowMasked multiplies elements and stores the low part of the result, masked. // // Asm: VPMULLW, CPU Feature: AVX512BW -func (x Int16x16) MulLowMasked(y Int16x16, z Mask16x16) Int16x16 +func (x Int16x16) MulLowMasked(y Int16x16, mask Mask16x16) Int16x16 // MulLowMasked multiplies elements and stores the low part of the result, masked. // // Asm: VPMULLW, CPU Feature: AVX512BW -func (x Int16x32) MulLowMasked(y Int16x32, z Mask16x32) Int16x32 +func (x Int16x32) MulLowMasked(y Int16x32, mask Mask16x32) Int16x32 // MulLowMasked multiplies elements and stores the low part of the result, masked. // // Asm: VPMULLD, CPU Feature: AVX512F -func (x Int32x4) MulLowMasked(y Int32x4, z Mask32x4) Int32x4 +func (x Int32x4) MulLowMasked(y Int32x4, mask Mask32x4) Int32x4 // MulLowMasked multiplies elements and stores the low part of the result, masked. // // Asm: VPMULLD, CPU Feature: AVX512F -func (x Int32x8) MulLowMasked(y Int32x8, z Mask32x8) Int32x8 +func (x Int32x8) MulLowMasked(y Int32x8, mask Mask32x8) Int32x8 // MulLowMasked multiplies elements and stores the low part of the result, masked. // // Asm: VPMULLD, CPU Feature: AVX512F -func (x Int32x16) MulLowMasked(y Int32x16, z Mask32x16) Int32x16 +func (x Int32x16) MulLowMasked(y Int32x16, mask Mask32x16) Int32x16 // MulLowMasked multiplies elements and stores the low part of the result, masked. // // Asm: VPMULLQ, CPU Feature: AVX512DQ -func (x Int64x2) MulLowMasked(y Int64x2, z Mask64x2) Int64x2 +func (x Int64x2) MulLowMasked(y Int64x2, mask Mask64x2) Int64x2 // MulLowMasked multiplies elements and stores the low part of the result, masked. // // Asm: VPMULLQ, CPU Feature: AVX512DQ -func (x Int64x4) MulLowMasked(y Int64x4, z Mask64x4) Int64x4 +func (x Int64x4) MulLowMasked(y Int64x4, mask Mask64x4) Int64x4 // MulLowMasked multiplies elements and stores the low part of the result, masked. // // Asm: VPMULLQ, CPU Feature: AVX512DQ -func (x Int64x8) MulLowMasked(y Int64x8, z Mask64x8) Int64x8 +func (x Int64x8) MulLowMasked(y Int64x8, mask Mask64x8) Int64x8 /* MulMasked */ // MulMasked multiplies corresponding elements of two vectors, masked. // // Asm: VMULPS, CPU Feature: AVX512F -func (x Float32x4) MulMasked(y Float32x4, z Mask32x4) Float32x4 +func (x Float32x4) MulMasked(y Float32x4, mask Mask32x4) Float32x4 // MulMasked multiplies corresponding elements of two vectors, masked. // // Asm: VMULPS, CPU Feature: AVX512F -func (x Float32x8) MulMasked(y Float32x8, z Mask32x8) Float32x8 +func (x Float32x8) MulMasked(y Float32x8, mask Mask32x8) Float32x8 // MulMasked multiplies corresponding elements of two vectors, masked. // // Asm: VMULPS, CPU Feature: AVX512F -func (x Float32x16) MulMasked(y Float32x16, z Mask32x16) Float32x16 +func (x Float32x16) MulMasked(y Float32x16, mask Mask32x16) Float32x16 // MulMasked multiplies corresponding elements of two vectors, masked. // // Asm: VMULPD, CPU Feature: AVX512F -func (x Float64x2) MulMasked(y Float64x2, z Mask64x2) Float64x2 +func (x Float64x2) MulMasked(y Float64x2, mask Mask64x2) Float64x2 // MulMasked multiplies corresponding elements of two vectors, masked. // // Asm: VMULPD, CPU Feature: AVX512F -func (x Float64x4) MulMasked(y Float64x4, z Mask64x4) Float64x4 +func (x Float64x4) MulMasked(y Float64x4, mask Mask64x4) Float64x4 // MulMasked multiplies corresponding elements of two vectors, masked. // // Asm: VMULPD, CPU Feature: AVX512F -func (x Float64x8) MulMasked(y Float64x8, z Mask64x8) Float64x8 +func (x Float64x8) MulMasked(y Float64x8, mask Mask64x8) Float64x8 /* NotEqual */ @@ -4858,152 +4858,152 @@ func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8 // NotEqualMasked compares for inequality. // // Asm: VCMPPS, CPU Feature: AVX512F -func (x Float32x4) NotEqualMasked(y Float32x4, z Mask32x4) Mask32x4 +func (x Float32x4) NotEqualMasked(y Float32x4, mask Mask32x4) Mask32x4 // NotEqualMasked compares for inequality. // // Asm: VCMPPS, CPU Feature: AVX512F -func (x Float32x8) NotEqualMasked(y Float32x8, z Mask32x8) Mask32x8 +func (x Float32x8) NotEqualMasked(y Float32x8, mask Mask32x8) Mask32x8 // NotEqualMasked compares for inequality. // // Asm: VCMPPS, CPU Feature: AVX512F -func (x Float32x16) NotEqualMasked(y Float32x16, z Mask32x16) Mask32x16 +func (x Float32x16) NotEqualMasked(y Float32x16, mask Mask32x16) Mask32x16 // NotEqualMasked compares for inequality. // // Asm: VCMPPD, CPU Feature: AVX512F -func (x Float64x2) NotEqualMasked(y Float64x2, z Mask64x2) Mask64x2 +func (x Float64x2) NotEqualMasked(y Float64x2, mask Mask64x2) Mask64x2 // NotEqualMasked compares for inequality. // // Asm: VCMPPD, CPU Feature: AVX512F -func (x Float64x4) NotEqualMasked(y Float64x4, z Mask64x4) Mask64x4 +func (x Float64x4) NotEqualMasked(y Float64x4, mask Mask64x4) Mask64x4 // NotEqualMasked compares for inequality. // // Asm: VCMPPD, CPU Feature: AVX512F -func (x Float64x8) NotEqualMasked(y Float64x8, z Mask64x8) Mask64x8 +func (x Float64x8) NotEqualMasked(y Float64x8, mask Mask64x8) Mask64x8 // NotEqualMasked compares for inequality. // // Asm: VPCMPB, CPU Feature: AVX512BW -func (x Int8x16) NotEqualMasked(y Int8x16, z Mask8x16) Mask8x16 +func (x Int8x16) NotEqualMasked(y Int8x16, mask Mask8x16) Mask8x16 // NotEqualMasked compares for inequality. // // Asm: VPCMPB, CPU Feature: AVX512BW -func (x Int8x32) NotEqualMasked(y Int8x32, z Mask8x32) Mask8x32 +func (x Int8x32) NotEqualMasked(y Int8x32, mask Mask8x32) Mask8x32 // NotEqualMasked compares for inequality. // // Asm: VPCMPB, CPU Feature: AVX512BW -func (x Int8x64) NotEqualMasked(y Int8x64, z Mask8x64) Mask8x64 +func (x Int8x64) NotEqualMasked(y Int8x64, mask Mask8x64) Mask8x64 // NotEqualMasked compares for inequality. // // Asm: VPCMPW, CPU Feature: AVX512BW -func (x Int16x8) NotEqualMasked(y Int16x8, z Mask16x8) Mask16x8 +func (x Int16x8) NotEqualMasked(y Int16x8, mask Mask16x8) Mask16x8 // NotEqualMasked compares for inequality. // // Asm: VPCMPW, CPU Feature: AVX512BW -func (x Int16x16) NotEqualMasked(y Int16x16, z Mask16x16) Mask16x16 +func (x Int16x16) NotEqualMasked(y Int16x16, mask Mask16x16) Mask16x16 // NotEqualMasked compares for inequality. // // Asm: VPCMPW, CPU Feature: AVX512BW -func (x Int16x32) NotEqualMasked(y Int16x32, z Mask16x32) Mask16x32 +func (x Int16x32) NotEqualMasked(y Int16x32, mask Mask16x32) Mask16x32 // NotEqualMasked compares for inequality. // // Asm: VPCMPD, CPU Feature: AVX512F -func (x Int32x4) NotEqualMasked(y Int32x4, z Mask32x4) Mask32x4 +func (x Int32x4) NotEqualMasked(y Int32x4, mask Mask32x4) Mask32x4 // NotEqualMasked compares for inequality. // // Asm: VPCMPD, CPU Feature: AVX512F -func (x Int32x8) NotEqualMasked(y Int32x8, z Mask32x8) Mask32x8 +func (x Int32x8) NotEqualMasked(y Int32x8, mask Mask32x8) Mask32x8 // NotEqualMasked compares for inequality. // // Asm: VPCMPD, CPU Feature: AVX512F -func (x Int32x16) NotEqualMasked(y Int32x16, z Mask32x16) Mask32x16 +func (x Int32x16) NotEqualMasked(y Int32x16, mask Mask32x16) Mask32x16 // NotEqualMasked compares for inequality. // // Asm: VPCMPQ, CPU Feature: AVX512F -func (x Int64x2) NotEqualMasked(y Int64x2, z Mask64x2) Mask64x2 +func (x Int64x2) NotEqualMasked(y Int64x2, mask Mask64x2) Mask64x2 // NotEqualMasked compares for inequality. // // Asm: VPCMPQ, CPU Feature: AVX512F -func (x Int64x4) NotEqualMasked(y Int64x4, z Mask64x4) Mask64x4 +func (x Int64x4) NotEqualMasked(y Int64x4, mask Mask64x4) Mask64x4 // NotEqualMasked compares for inequality. // // Asm: VPCMPQ, CPU Feature: AVX512F -func (x Int64x8) NotEqualMasked(y Int64x8, z Mask64x8) Mask64x8 +func (x Int64x8) NotEqualMasked(y Int64x8, mask Mask64x8) Mask64x8 // NotEqualMasked compares for inequality. // // Asm: VPCMPUB, CPU Feature: AVX512BW -func (x Uint8x16) NotEqualMasked(y Uint8x16, z Mask8x16) Mask8x16 +func (x Uint8x16) NotEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16 // NotEqualMasked compares for inequality. // // Asm: VPCMPUB, CPU Feature: AVX512BW -func (x Uint8x32) NotEqualMasked(y Uint8x32, z Mask8x32) Mask8x32 +func (x Uint8x32) NotEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32 // NotEqualMasked compares for inequality. // // Asm: VPCMPUB, CPU Feature: AVX512BW -func (x Uint8x64) NotEqualMasked(y Uint8x64, z Mask8x64) Mask8x64 +func (x Uint8x64) NotEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64 // NotEqualMasked compares for inequality. // // Asm: VPCMPUW, CPU Feature: AVX512BW -func (x Uint16x8) NotEqualMasked(y Uint16x8, z Mask16x8) Mask16x8 +func (x Uint16x8) NotEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8 // NotEqualMasked compares for inequality. // // Asm: VPCMPUW, CPU Feature: AVX512BW -func (x Uint16x16) NotEqualMasked(y Uint16x16, z Mask16x16) Mask16x16 +func (x Uint16x16) NotEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16 // NotEqualMasked compares for inequality. // // Asm: VPCMPUW, CPU Feature: AVX512BW -func (x Uint16x32) NotEqualMasked(y Uint16x32, z Mask16x32) Mask16x32 +func (x Uint16x32) NotEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32 // NotEqualMasked compares for inequality. // // Asm: VPCMPUD, CPU Feature: AVX512F -func (x Uint32x4) NotEqualMasked(y Uint32x4, z Mask32x4) Mask32x4 +func (x Uint32x4) NotEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4 // NotEqualMasked compares for inequality. // // Asm: VPCMPUD, CPU Feature: AVX512F -func (x Uint32x8) NotEqualMasked(y Uint32x8, z Mask32x8) Mask32x8 +func (x Uint32x8) NotEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8 // NotEqualMasked compares for inequality. // // Asm: VPCMPUD, CPU Feature: AVX512F -func (x Uint32x16) NotEqualMasked(y Uint32x16, z Mask32x16) Mask32x16 +func (x Uint32x16) NotEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16 // NotEqualMasked compares for inequality. // // Asm: VPCMPUQ, CPU Feature: AVX512F -func (x Uint64x2) NotEqualMasked(y Uint64x2, z Mask64x2) Mask64x2 +func (x Uint64x2) NotEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2 // NotEqualMasked compares for inequality. // // Asm: VPCMPUQ, CPU Feature: AVX512F -func (x Uint64x4) NotEqualMasked(y Uint64x4, z Mask64x4) Mask64x4 +func (x Uint64x4) NotEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4 // NotEqualMasked compares for inequality. // // Asm: VPCMPUQ, CPU Feature: AVX512F -func (x Uint64x8) NotEqualMasked(y Uint64x8, z Mask64x8) Mask64x8 +func (x Uint64x8) NotEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8 /* Or */ @@ -5112,62 +5112,62 @@ func (x Uint64x8) Or(y Uint64x8) Uint64x8 // OrMasked performs a masked bitwise OR operation between two vectors. // // Asm: VPORD, CPU Feature: AVX512F -func (x Int32x4) OrMasked(y Int32x4, z Mask32x4) Int32x4 +func (x Int32x4) OrMasked(y Int32x4, mask Mask32x4) Int32x4 // OrMasked performs a masked bitwise OR operation between two vectors. // // Asm: VPORD, CPU Feature: AVX512F -func (x Int32x8) OrMasked(y Int32x8, z Mask32x8) Int32x8 +func (x Int32x8) OrMasked(y Int32x8, mask Mask32x8) Int32x8 // OrMasked performs a masked bitwise OR operation between two vectors. // // Asm: VPORD, CPU Feature: AVX512F -func (x Int32x16) OrMasked(y Int32x16, z Mask32x16) Int32x16 +func (x Int32x16) OrMasked(y Int32x16, mask Mask32x16) Int32x16 // OrMasked performs a masked bitwise OR operation between two vectors. // // Asm: VPORQ, CPU Feature: AVX512F -func (x Int64x2) OrMasked(y Int64x2, z Mask64x2) Int64x2 +func (x Int64x2) OrMasked(y Int64x2, mask Mask64x2) Int64x2 // OrMasked performs a masked bitwise OR operation between two vectors. // // Asm: VPORQ, CPU Feature: AVX512F -func (x Int64x4) OrMasked(y Int64x4, z Mask64x4) Int64x4 +func (x Int64x4) OrMasked(y Int64x4, mask Mask64x4) Int64x4 // OrMasked performs a masked bitwise OR operation between two vectors. // // Asm: VPORQ, CPU Feature: AVX512F -func (x Int64x8) OrMasked(y Int64x8, z Mask64x8) Int64x8 +func (x Int64x8) OrMasked(y Int64x8, mask Mask64x8) Int64x8 // OrMasked performs a masked bitwise OR operation between two vectors. // // Asm: VPORD, CPU Feature: AVX512F -func (x Uint32x4) OrMasked(y Uint32x4, z Mask32x4) Uint32x4 +func (x Uint32x4) OrMasked(y Uint32x4, mask Mask32x4) Uint32x4 // OrMasked performs a masked bitwise OR operation between two vectors. // // Asm: VPORD, CPU Feature: AVX512F -func (x Uint32x8) OrMasked(y Uint32x8, z Mask32x8) Uint32x8 +func (x Uint32x8) OrMasked(y Uint32x8, mask Mask32x8) Uint32x8 // OrMasked performs a masked bitwise OR operation between two vectors. // // Asm: VPORD, CPU Feature: AVX512F -func (x Uint32x16) OrMasked(y Uint32x16, z Mask32x16) Uint32x16 +func (x Uint32x16) OrMasked(y Uint32x16, mask Mask32x16) Uint32x16 // OrMasked performs a masked bitwise OR operation between two vectors. // // Asm: VPORQ, CPU Feature: AVX512F -func (x Uint64x2) OrMasked(y Uint64x2, z Mask64x2) Uint64x2 +func (x Uint64x2) OrMasked(y Uint64x2, mask Mask64x2) Uint64x2 // OrMasked performs a masked bitwise OR operation between two vectors. // // Asm: VPORQ, CPU Feature: AVX512F -func (x Uint64x4) OrMasked(y Uint64x4, z Mask64x4) Uint64x4 +func (x Uint64x4) OrMasked(y Uint64x4, mask Mask64x4) Uint64x4 // OrMasked performs a masked bitwise OR operation between two vectors. // // Asm: VPORQ, CPU Feature: AVX512F -func (x Uint64x8) OrMasked(y Uint64x8, z Mask64x8) Uint64x8 +func (x Uint64x8) OrMasked(y Uint64x8, mask Mask64x8) Uint64x8 /* PairDotProd */ @@ -5211,17 +5211,17 @@ func (x Int32x16) PairDotProdAccumulate(y Int16x32, z Int16x32) Int32x16 // PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x. // // Asm: VPDPWSSD, CPU Feature: AVX512VNNI -func (x Int32x4) PairDotProdAccumulateMasked(y Int16x8, z Int16x8, u Mask32x4) Int32x4 +func (x Int32x4) PairDotProdAccumulateMasked(y Int16x8, z Int16x8, mask Mask32x4) Int32x4 // PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x. // // Asm: VPDPWSSD, CPU Feature: AVX512VNNI -func (x Int32x8) PairDotProdAccumulateMasked(y Int16x16, z Int16x16, u Mask32x8) Int32x8 +func (x Int32x8) PairDotProdAccumulateMasked(y Int16x16, z Int16x16, mask Mask32x8) Int32x8 // PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x. // // Asm: VPDPWSSD, CPU Feature: AVX512VNNI -func (x Int32x16) PairDotProdAccumulateMasked(y Int16x32, z Int16x32, u Mask32x16) Int32x16 +func (x Int32x16) PairDotProdAccumulateMasked(y Int16x32, z Int16x32, mask Mask32x16) Int32x16 /* PairDotProdMasked */ @@ -5229,19 +5229,19 @@ func (x Int32x16) PairDotProdAccumulateMasked(y Int16x32, z Int16x32, u Mask32x1 // yielding a vector of half as many elements with twice the input element size. // // Asm: VPMADDWD, CPU Feature: AVX512BW -func (x Int16x8) PairDotProdMasked(y Int16x8, z Mask16x8) Int32x4 +func (x Int16x8) PairDotProdMasked(y Int16x8, mask Mask16x8) Int32x4 // PairDotProdMasked multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. // // Asm: VPMADDWD, CPU Feature: AVX512BW -func (x Int16x16) PairDotProdMasked(y Int16x16, z Mask16x16) Int32x8 +func (x Int16x16) PairDotProdMasked(y Int16x16, mask Mask16x16) Int32x8 // PairDotProdMasked multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. // // Asm: VPMADDWD, CPU Feature: AVX512BW -func (x Int16x32) PairDotProdMasked(y Int16x32, z Mask16x32) Int32x16 +func (x Int16x32) PairDotProdMasked(y Int16x32, mask Mask16x32) Int32x16 /* PairwiseAdd */ @@ -5811,7 +5811,7 @@ func (x Uint64x8) Permute2(y Uint64x8, indices Uint64x8) Uint64x8 // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2B, CPU Feature: AVX512VBMI -func (x Int8x16) Permute2Masked(y Int8x16, indices Uint8x16, u Mask8x16) Int8x16 +func (x Int8x16) Permute2Masked(y Int8x16, indices Uint8x16, mask Mask8x16) Int8x16 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5819,7 +5819,7 @@ func (x Int8x16) Permute2Masked(y Int8x16, indices Uint8x16, u Mask8x16) Int8x16 // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2B, CPU Feature: AVX512VBMI -func (x Uint8x16) Permute2Masked(y Uint8x16, indices Uint8x16, u Mask8x16) Uint8x16 +func (x Uint8x16) Permute2Masked(y Uint8x16, indices Uint8x16, mask Mask8x16) Uint8x16 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5827,7 +5827,7 @@ func (x Uint8x16) Permute2Masked(y Uint8x16, indices Uint8x16, u Mask8x16) Uint8 // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2B, CPU Feature: AVX512VBMI -func (x Int8x32) Permute2Masked(y Int8x32, indices Uint8x32, u Mask8x32) Int8x32 +func (x Int8x32) Permute2Masked(y Int8x32, indices Uint8x32, mask Mask8x32) Int8x32 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5835,7 +5835,7 @@ func (x Int8x32) Permute2Masked(y Int8x32, indices Uint8x32, u Mask8x32) Int8x32 // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2B, CPU Feature: AVX512VBMI -func (x Uint8x32) Permute2Masked(y Uint8x32, indices Uint8x32, u Mask8x32) Uint8x32 +func (x Uint8x32) Permute2Masked(y Uint8x32, indices Uint8x32, mask Mask8x32) Uint8x32 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5843,7 +5843,7 @@ func (x Uint8x32) Permute2Masked(y Uint8x32, indices Uint8x32, u Mask8x32) Uint8 // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2B, CPU Feature: AVX512VBMI -func (x Int8x64) Permute2Masked(y Int8x64, indices Uint8x64, u Mask8x64) Int8x64 +func (x Int8x64) Permute2Masked(y Int8x64, indices Uint8x64, mask Mask8x64) Int8x64 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5851,7 +5851,7 @@ func (x Int8x64) Permute2Masked(y Int8x64, indices Uint8x64, u Mask8x64) Int8x64 // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2B, CPU Feature: AVX512VBMI -func (x Uint8x64) Permute2Masked(y Uint8x64, indices Uint8x64, u Mask8x64) Uint8x64 +func (x Uint8x64) Permute2Masked(y Uint8x64, indices Uint8x64, mask Mask8x64) Uint8x64 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5859,7 +5859,7 @@ func (x Uint8x64) Permute2Masked(y Uint8x64, indices Uint8x64, u Mask8x64) Uint8 // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2W, CPU Feature: AVX512BW -func (x Int16x8) Permute2Masked(y Int16x8, indices Uint16x8, u Mask16x8) Int16x8 +func (x Int16x8) Permute2Masked(y Int16x8, indices Uint16x8, mask Mask16x8) Int16x8 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5867,7 +5867,7 @@ func (x Int16x8) Permute2Masked(y Int16x8, indices Uint16x8, u Mask16x8) Int16x8 // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2W, CPU Feature: AVX512BW -func (x Uint16x8) Permute2Masked(y Uint16x8, indices Uint16x8, u Mask16x8) Uint16x8 +func (x Uint16x8) Permute2Masked(y Uint16x8, indices Uint16x8, mask Mask16x8) Uint16x8 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5875,7 +5875,7 @@ func (x Uint16x8) Permute2Masked(y Uint16x8, indices Uint16x8, u Mask16x8) Uint1 // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2W, CPU Feature: AVX512BW -func (x Int16x16) Permute2Masked(y Int16x16, indices Uint16x16, u Mask16x16) Int16x16 +func (x Int16x16) Permute2Masked(y Int16x16, indices Uint16x16, mask Mask16x16) Int16x16 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5883,7 +5883,7 @@ func (x Int16x16) Permute2Masked(y Int16x16, indices Uint16x16, u Mask16x16) Int // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2W, CPU Feature: AVX512BW -func (x Uint16x16) Permute2Masked(y Uint16x16, indices Uint16x16, u Mask16x16) Uint16x16 +func (x Uint16x16) Permute2Masked(y Uint16x16, indices Uint16x16, mask Mask16x16) Uint16x16 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5891,7 +5891,7 @@ func (x Uint16x16) Permute2Masked(y Uint16x16, indices Uint16x16, u Mask16x16) U // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2W, CPU Feature: AVX512BW -func (x Int16x32) Permute2Masked(y Int16x32, indices Uint16x32, u Mask16x32) Int16x32 +func (x Int16x32) Permute2Masked(y Int16x32, indices Uint16x32, mask Mask16x32) Int16x32 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5899,7 +5899,7 @@ func (x Int16x32) Permute2Masked(y Int16x32, indices Uint16x32, u Mask16x32) Int // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2W, CPU Feature: AVX512BW -func (x Uint16x32) Permute2Masked(y Uint16x32, indices Uint16x32, u Mask16x32) Uint16x32 +func (x Uint16x32) Permute2Masked(y Uint16x32, indices Uint16x32, mask Mask16x32) Uint16x32 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5907,7 +5907,7 @@ func (x Uint16x32) Permute2Masked(y Uint16x32, indices Uint16x32, u Mask16x32) U // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2PS, CPU Feature: AVX512F -func (x Float32x4) Permute2Masked(y Float32x4, indices Uint32x4, u Mask32x4) Float32x4 +func (x Float32x4) Permute2Masked(y Float32x4, indices Uint32x4, mask Mask32x4) Float32x4 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5915,7 +5915,7 @@ func (x Float32x4) Permute2Masked(y Float32x4, indices Uint32x4, u Mask32x4) Flo // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2D, CPU Feature: AVX512F -func (x Int32x4) Permute2Masked(y Int32x4, indices Uint32x4, u Mask32x4) Int32x4 +func (x Int32x4) Permute2Masked(y Int32x4, indices Uint32x4, mask Mask32x4) Int32x4 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5923,7 +5923,7 @@ func (x Int32x4) Permute2Masked(y Int32x4, indices Uint32x4, u Mask32x4) Int32x4 // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2D, CPU Feature: AVX512F -func (x Uint32x4) Permute2Masked(y Uint32x4, indices Uint32x4, u Mask32x4) Uint32x4 +func (x Uint32x4) Permute2Masked(y Uint32x4, indices Uint32x4, mask Mask32x4) Uint32x4 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5931,7 +5931,7 @@ func (x Uint32x4) Permute2Masked(y Uint32x4, indices Uint32x4, u Mask32x4) Uint3 // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2PS, CPU Feature: AVX512F -func (x Float32x8) Permute2Masked(y Float32x8, indices Uint32x8, u Mask32x8) Float32x8 +func (x Float32x8) Permute2Masked(y Float32x8, indices Uint32x8, mask Mask32x8) Float32x8 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5939,7 +5939,7 @@ func (x Float32x8) Permute2Masked(y Float32x8, indices Uint32x8, u Mask32x8) Flo // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2D, CPU Feature: AVX512F -func (x Int32x8) Permute2Masked(y Int32x8, indices Uint32x8, u Mask32x8) Int32x8 +func (x Int32x8) Permute2Masked(y Int32x8, indices Uint32x8, mask Mask32x8) Int32x8 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5947,7 +5947,7 @@ func (x Int32x8) Permute2Masked(y Int32x8, indices Uint32x8, u Mask32x8) Int32x8 // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2D, CPU Feature: AVX512F -func (x Uint32x8) Permute2Masked(y Uint32x8, indices Uint32x8, u Mask32x8) Uint32x8 +func (x Uint32x8) Permute2Masked(y Uint32x8, indices Uint32x8, mask Mask32x8) Uint32x8 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5955,7 +5955,7 @@ func (x Uint32x8) Permute2Masked(y Uint32x8, indices Uint32x8, u Mask32x8) Uint3 // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2PS, CPU Feature: AVX512F -func (x Float32x16) Permute2Masked(y Float32x16, indices Uint32x16, u Mask32x16) Float32x16 +func (x Float32x16) Permute2Masked(y Float32x16, indices Uint32x16, mask Mask32x16) Float32x16 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5963,7 +5963,7 @@ func (x Float32x16) Permute2Masked(y Float32x16, indices Uint32x16, u Mask32x16) // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2D, CPU Feature: AVX512F -func (x Int32x16) Permute2Masked(y Int32x16, indices Uint32x16, u Mask32x16) Int32x16 +func (x Int32x16) Permute2Masked(y Int32x16, indices Uint32x16, mask Mask32x16) Int32x16 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5971,7 +5971,7 @@ func (x Int32x16) Permute2Masked(y Int32x16, indices Uint32x16, u Mask32x16) Int // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2D, CPU Feature: AVX512F -func (x Uint32x16) Permute2Masked(y Uint32x16, indices Uint32x16, u Mask32x16) Uint32x16 +func (x Uint32x16) Permute2Masked(y Uint32x16, indices Uint32x16, mask Mask32x16) Uint32x16 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5979,7 +5979,7 @@ func (x Uint32x16) Permute2Masked(y Uint32x16, indices Uint32x16, u Mask32x16) U // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2PD, CPU Feature: AVX512F -func (x Float64x2) Permute2Masked(y Float64x2, indices Uint64x2, u Mask64x2) Float64x2 +func (x Float64x2) Permute2Masked(y Float64x2, indices Uint64x2, mask Mask64x2) Float64x2 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5987,7 +5987,7 @@ func (x Float64x2) Permute2Masked(y Float64x2, indices Uint64x2, u Mask64x2) Flo // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2Q, CPU Feature: AVX512F -func (x Int64x2) Permute2Masked(y Int64x2, indices Uint64x2, u Mask64x2) Int64x2 +func (x Int64x2) Permute2Masked(y Int64x2, indices Uint64x2, mask Mask64x2) Int64x2 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -5995,7 +5995,7 @@ func (x Int64x2) Permute2Masked(y Int64x2, indices Uint64x2, u Mask64x2) Int64x2 // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2Q, CPU Feature: AVX512F -func (x Uint64x2) Permute2Masked(y Uint64x2, indices Uint64x2, u Mask64x2) Uint64x2 +func (x Uint64x2) Permute2Masked(y Uint64x2, indices Uint64x2, mask Mask64x2) Uint64x2 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -6003,7 +6003,7 @@ func (x Uint64x2) Permute2Masked(y Uint64x2, indices Uint64x2, u Mask64x2) Uint6 // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2PD, CPU Feature: AVX512F -func (x Float64x4) Permute2Masked(y Float64x4, indices Uint64x4, u Mask64x4) Float64x4 +func (x Float64x4) Permute2Masked(y Float64x4, indices Uint64x4, mask Mask64x4) Float64x4 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -6011,7 +6011,7 @@ func (x Float64x4) Permute2Masked(y Float64x4, indices Uint64x4, u Mask64x4) Flo // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2Q, CPU Feature: AVX512F -func (x Int64x4) Permute2Masked(y Int64x4, indices Uint64x4, u Mask64x4) Int64x4 +func (x Int64x4) Permute2Masked(y Int64x4, indices Uint64x4, mask Mask64x4) Int64x4 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -6019,7 +6019,7 @@ func (x Int64x4) Permute2Masked(y Int64x4, indices Uint64x4, u Mask64x4) Int64x4 // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2Q, CPU Feature: AVX512F -func (x Uint64x4) Permute2Masked(y Uint64x4, indices Uint64x4, u Mask64x4) Uint64x4 +func (x Uint64x4) Permute2Masked(y Uint64x4, indices Uint64x4, mask Mask64x4) Uint64x4 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -6027,7 +6027,7 @@ func (x Uint64x4) Permute2Masked(y Uint64x4, indices Uint64x4, u Mask64x4) Uint6 // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2PD, CPU Feature: AVX512F -func (x Float64x8) Permute2Masked(y Float64x8, indices Uint64x8, u Mask64x8) Float64x8 +func (x Float64x8) Permute2Masked(y Float64x8, indices Uint64x8, mask Mask64x8) Float64x8 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -6035,7 +6035,7 @@ func (x Float64x8) Permute2Masked(y Float64x8, indices Uint64x8, u Mask64x8) Flo // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2Q, CPU Feature: AVX512F -func (x Int64x8) Permute2Masked(y Int64x8, indices Uint64x8, u Mask64x8) Int64x8 +func (x Int64x8) Permute2Masked(y Int64x8, indices Uint64x8, mask Mask64x8) Int64x8 // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -6043,7 +6043,7 @@ func (x Int64x8) Permute2Masked(y Int64x8, indices Uint64x8, u Mask64x8) Int64x8 // Only the needed bits to represent xy's index are used in indices' elements. // // Asm: VPERMI2Q, CPU Feature: AVX512F -func (x Uint64x8) Permute2Masked(y Uint64x8, indices Uint64x8, u Mask64x8) Uint64x8 +func (x Uint64x8) Permute2Masked(y Uint64x8, indices Uint64x8, mask Mask64x8) Uint64x8 /* PermuteMasked */ @@ -6052,168 +6052,168 @@ func (x Uint64x8) Permute2Masked(y Uint64x8, indices Uint64x8, u Mask64x8) Uint6 // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMB, CPU Feature: AVX512VBMI -func (x Int8x16) PermuteMasked(indices Uint8x16, z Mask8x16) Int8x16 +func (x Int8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Int8x16 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMB, CPU Feature: AVX512VBMI -func (x Uint8x16) PermuteMasked(indices Uint8x16, z Mask8x16) Uint8x16 +func (x Uint8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Uint8x16 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMB, CPU Feature: AVX512VBMI -func (x Int8x32) PermuteMasked(indices Uint8x32, z Mask8x32) Int8x32 +func (x Int8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Int8x32 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMB, CPU Feature: AVX512VBMI -func (x Uint8x32) PermuteMasked(indices Uint8x32, z Mask8x32) Uint8x32 +func (x Uint8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Uint8x32 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMB, CPU Feature: AVX512VBMI -func (x Int8x64) PermuteMasked(indices Uint8x64, z Mask8x64) Int8x64 +func (x Int8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Int8x64 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMB, CPU Feature: AVX512VBMI -func (x Uint8x64) PermuteMasked(indices Uint8x64, z Mask8x64) Uint8x64 +func (x Uint8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Uint8x64 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMW, CPU Feature: AVX512BW -func (x Int16x8) PermuteMasked(indices Uint16x8, z Mask16x8) Int16x8 +func (x Int16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Int16x8 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMW, CPU Feature: AVX512BW -func (x Uint16x8) PermuteMasked(indices Uint16x8, z Mask16x8) Uint16x8 +func (x Uint16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Uint16x8 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMW, CPU Feature: AVX512BW -func (x Int16x16) PermuteMasked(indices Uint16x16, z Mask16x16) Int16x16 +func (x Int16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Int16x16 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMW, CPU Feature: AVX512BW -func (x Uint16x16) PermuteMasked(indices Uint16x16, z Mask16x16) Uint16x16 +func (x Uint16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Uint16x16 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMW, CPU Feature: AVX512BW -func (x Int16x32) PermuteMasked(indices Uint16x32, z Mask16x32) Int16x32 +func (x Int16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Int16x32 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMW, CPU Feature: AVX512BW -func (x Uint16x32) PermuteMasked(indices Uint16x32, z Mask16x32) Uint16x32 +func (x Uint16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Uint16x32 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMPS, CPU Feature: AVX512F -func (x Float32x8) PermuteMasked(indices Uint32x8, z Mask32x8) Float32x8 +func (x Float32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Float32x8 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMD, CPU Feature: AVX512F -func (x Int32x8) PermuteMasked(indices Uint32x8, z Mask32x8) Int32x8 +func (x Int32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Int32x8 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMD, CPU Feature: AVX512F -func (x Uint32x8) PermuteMasked(indices Uint32x8, z Mask32x8) Uint32x8 +func (x Uint32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Uint32x8 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMPS, CPU Feature: AVX512F -func (x Float32x16) PermuteMasked(indices Uint32x16, z Mask32x16) Float32x16 +func (x Float32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Float32x16 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMD, CPU Feature: AVX512F -func (x Int32x16) PermuteMasked(indices Uint32x16, z Mask32x16) Int32x16 +func (x Int32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Int32x16 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMD, CPU Feature: AVX512F -func (x Uint32x16) PermuteMasked(indices Uint32x16, z Mask32x16) Uint32x16 +func (x Uint32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Uint32x16 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMPD, CPU Feature: AVX512F -func (x Float64x4) PermuteMasked(indices Uint64x4, z Mask64x4) Float64x4 +func (x Float64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Float64x4 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMQ, CPU Feature: AVX512F -func (x Int64x4) PermuteMasked(indices Uint64x4, z Mask64x4) Int64x4 +func (x Int64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Int64x4 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMQ, CPU Feature: AVX512F -func (x Uint64x4) PermuteMasked(indices Uint64x4, z Mask64x4) Uint64x4 +func (x Uint64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Uint64x4 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMPD, CPU Feature: AVX512F -func (x Float64x8) PermuteMasked(indices Uint64x8, z Mask64x8) Float64x8 +func (x Float64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Float64x8 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMQ, CPU Feature: AVX512F -func (x Int64x8) PermuteMasked(indices Uint64x8, z Mask64x8) Int64x8 +func (x Int64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Int64x8 // PermuteMasked performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. // // Asm: VPERMQ, CPU Feature: AVX512F -func (x Uint64x8) PermuteMasked(indices Uint64x8, z Mask64x8) Uint64x8 +func (x Uint64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Uint64x8 /* PopCount */ @@ -6342,122 +6342,122 @@ func (x Uint64x8) PopCount() Uint64x8 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTB, CPU Feature: AVX512BITALG -func (x Int8x16) PopCountMasked(y Mask8x16) Int8x16 +func (x Int8x16) PopCountMasked(mask Mask8x16) Int8x16 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTB, CPU Feature: AVX512BITALG -func (x Int8x32) PopCountMasked(y Mask8x32) Int8x32 +func (x Int8x32) PopCountMasked(mask Mask8x32) Int8x32 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTB, CPU Feature: AVX512BITALG -func (x Int8x64) PopCountMasked(y Mask8x64) Int8x64 +func (x Int8x64) PopCountMasked(mask Mask8x64) Int8x64 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTW, CPU Feature: AVX512BITALG -func (x Int16x8) PopCountMasked(y Mask16x8) Int16x8 +func (x Int16x8) PopCountMasked(mask Mask16x8) Int16x8 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTW, CPU Feature: AVX512BITALG -func (x Int16x16) PopCountMasked(y Mask16x16) Int16x16 +func (x Int16x16) PopCountMasked(mask Mask16x16) Int16x16 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTW, CPU Feature: AVX512BITALG -func (x Int16x32) PopCountMasked(y Mask16x32) Int16x32 +func (x Int16x32) PopCountMasked(mask Mask16x32) Int16x32 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ -func (x Int32x4) PopCountMasked(y Mask32x4) Int32x4 +func (x Int32x4) PopCountMasked(mask Mask32x4) Int32x4 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ -func (x Int32x8) PopCountMasked(y Mask32x8) Int32x8 +func (x Int32x8) PopCountMasked(mask Mask32x8) Int32x8 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ -func (x Int32x16) PopCountMasked(y Mask32x16) Int32x16 +func (x Int32x16) PopCountMasked(mask Mask32x16) Int32x16 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ -func (x Int64x2) PopCountMasked(y Mask64x2) Int64x2 +func (x Int64x2) PopCountMasked(mask Mask64x2) Int64x2 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ -func (x Int64x4) PopCountMasked(y Mask64x4) Int64x4 +func (x Int64x4) PopCountMasked(mask Mask64x4) Int64x4 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ -func (x Int64x8) PopCountMasked(y Mask64x8) Int64x8 +func (x Int64x8) PopCountMasked(mask Mask64x8) Int64x8 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTB, CPU Feature: AVX512BITALG -func (x Uint8x16) PopCountMasked(y Mask8x16) Uint8x16 +func (x Uint8x16) PopCountMasked(mask Mask8x16) Uint8x16 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTB, CPU Feature: AVX512BITALG -func (x Uint8x32) PopCountMasked(y Mask8x32) Uint8x32 +func (x Uint8x32) PopCountMasked(mask Mask8x32) Uint8x32 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTB, CPU Feature: AVX512BITALG -func (x Uint8x64) PopCountMasked(y Mask8x64) Uint8x64 +func (x Uint8x64) PopCountMasked(mask Mask8x64) Uint8x64 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTW, CPU Feature: AVX512BITALG -func (x Uint16x8) PopCountMasked(y Mask16x8) Uint16x8 +func (x Uint16x8) PopCountMasked(mask Mask16x8) Uint16x8 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTW, CPU Feature: AVX512BITALG -func (x Uint16x16) PopCountMasked(y Mask16x16) Uint16x16 +func (x Uint16x16) PopCountMasked(mask Mask16x16) Uint16x16 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTW, CPU Feature: AVX512BITALG -func (x Uint16x32) PopCountMasked(y Mask16x32) Uint16x32 +func (x Uint16x32) PopCountMasked(mask Mask16x32) Uint16x32 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ -func (x Uint32x4) PopCountMasked(y Mask32x4) Uint32x4 +func (x Uint32x4) PopCountMasked(mask Mask32x4) Uint32x4 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ -func (x Uint32x8) PopCountMasked(y Mask32x8) Uint32x8 +func (x Uint32x8) PopCountMasked(mask Mask32x8) Uint32x8 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ -func (x Uint32x16) PopCountMasked(y Mask32x16) Uint32x16 +func (x Uint32x16) PopCountMasked(mask Mask32x16) Uint32x16 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ -func (x Uint64x2) PopCountMasked(y Mask64x2) Uint64x2 +func (x Uint64x2) PopCountMasked(mask Mask64x2) Uint64x2 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ -func (x Uint64x4) PopCountMasked(y Mask64x4) Uint64x4 +func (x Uint64x4) PopCountMasked(mask Mask64x4) Uint64x4 // PopCountMasked counts the number of set bits in each element. // // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ -func (x Uint64x8) PopCountMasked(y Mask64x8) Uint64x8 +func (x Uint64x8) PopCountMasked(mask Mask64x8) Uint64x8 /* RotateAllLeft */ @@ -6552,84 +6552,84 @@ func (x Uint64x8) RotateAllLeft(shift uint8) Uint64x8 // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPROLD, CPU Feature: AVX512F -func (x Int32x4) RotateAllLeftMasked(shift uint8, y Mask32x4) Int32x4 +func (x Int32x4) RotateAllLeftMasked(shift uint8, mask Mask32x4) Int32x4 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate. // // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPROLD, CPU Feature: AVX512F -func (x Int32x8) RotateAllLeftMasked(shift uint8, y Mask32x8) Int32x8 +func (x Int32x8) RotateAllLeftMasked(shift uint8, mask Mask32x8) Int32x8 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate. // // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPROLD, CPU Feature: AVX512F -func (x Int32x16) RotateAllLeftMasked(shift uint8, y Mask32x16) Int32x16 +func (x Int32x16) RotateAllLeftMasked(shift uint8, mask Mask32x16) Int32x16 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate. // // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPROLQ, CPU Feature: AVX512F -func (x Int64x2) RotateAllLeftMasked(shift uint8, y Mask64x2) Int64x2 +func (x Int64x2) RotateAllLeftMasked(shift uint8, mask Mask64x2) Int64x2 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate. // // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPROLQ, CPU Feature: AVX512F -func (x Int64x4) RotateAllLeftMasked(shift uint8, y Mask64x4) Int64x4 +func (x Int64x4) RotateAllLeftMasked(shift uint8, mask Mask64x4) Int64x4 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate. // // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPROLQ, CPU Feature: AVX512F -func (x Int64x8) RotateAllLeftMasked(shift uint8, y Mask64x8) Int64x8 +func (x Int64x8) RotateAllLeftMasked(shift uint8, mask Mask64x8) Int64x8 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate. // // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPROLD, CPU Feature: AVX512F -func (x Uint32x4) RotateAllLeftMasked(shift uint8, y Mask32x4) Uint32x4 +func (x Uint32x4) RotateAllLeftMasked(shift uint8, mask Mask32x4) Uint32x4 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate. // // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPROLD, CPU Feature: AVX512F -func (x Uint32x8) RotateAllLeftMasked(shift uint8, y Mask32x8) Uint32x8 +func (x Uint32x8) RotateAllLeftMasked(shift uint8, mask Mask32x8) Uint32x8 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate. // // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPROLD, CPU Feature: AVX512F -func (x Uint32x16) RotateAllLeftMasked(shift uint8, y Mask32x16) Uint32x16 +func (x Uint32x16) RotateAllLeftMasked(shift uint8, mask Mask32x16) Uint32x16 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate. // // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPROLQ, CPU Feature: AVX512F -func (x Uint64x2) RotateAllLeftMasked(shift uint8, y Mask64x2) Uint64x2 +func (x Uint64x2) RotateAllLeftMasked(shift uint8, mask Mask64x2) Uint64x2 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate. // // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPROLQ, CPU Feature: AVX512F -func (x Uint64x4) RotateAllLeftMasked(shift uint8, y Mask64x4) Uint64x4 +func (x Uint64x4) RotateAllLeftMasked(shift uint8, mask Mask64x4) Uint64x4 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate. // // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPROLQ, CPU Feature: AVX512F -func (x Uint64x8) RotateAllLeftMasked(shift uint8, y Mask64x8) Uint64x8 +func (x Uint64x8) RotateAllLeftMasked(shift uint8, mask Mask64x8) Uint64x8 /* RotateAllRight */ @@ -6724,84 +6724,84 @@ func (x Uint64x8) RotateAllRight(shift uint8) Uint64x8 // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPRORD, CPU Feature: AVX512F -func (x Int32x4) RotateAllRightMasked(shift uint8, y Mask32x4) Int32x4 +func (x Int32x4) RotateAllRightMasked(shift uint8, mask Mask32x4) Int32x4 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate. // // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPRORD, CPU Feature: AVX512F -func (x Int32x8) RotateAllRightMasked(shift uint8, y Mask32x8) Int32x8 +func (x Int32x8) RotateAllRightMasked(shift uint8, mask Mask32x8) Int32x8 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate. // // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPRORD, CPU Feature: AVX512F -func (x Int32x16) RotateAllRightMasked(shift uint8, y Mask32x16) Int32x16 +func (x Int32x16) RotateAllRightMasked(shift uint8, mask Mask32x16) Int32x16 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate. // // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPRORQ, CPU Feature: AVX512F -func (x Int64x2) RotateAllRightMasked(shift uint8, y Mask64x2) Int64x2 +func (x Int64x2) RotateAllRightMasked(shift uint8, mask Mask64x2) Int64x2 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate. // // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPRORQ, CPU Feature: AVX512F -func (x Int64x4) RotateAllRightMasked(shift uint8, y Mask64x4) Int64x4 +func (x Int64x4) RotateAllRightMasked(shift uint8, mask Mask64x4) Int64x4 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate. // // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPRORQ, CPU Feature: AVX512F -func (x Int64x8) RotateAllRightMasked(shift uint8, y Mask64x8) Int64x8 +func (x Int64x8) RotateAllRightMasked(shift uint8, mask Mask64x8) Int64x8 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate. // // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPRORD, CPU Feature: AVX512F -func (x Uint32x4) RotateAllRightMasked(shift uint8, y Mask32x4) Uint32x4 +func (x Uint32x4) RotateAllRightMasked(shift uint8, mask Mask32x4) Uint32x4 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate. // // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPRORD, CPU Feature: AVX512F -func (x Uint32x8) RotateAllRightMasked(shift uint8, y Mask32x8) Uint32x8 +func (x Uint32x8) RotateAllRightMasked(shift uint8, mask Mask32x8) Uint32x8 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate. // // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPRORD, CPU Feature: AVX512F -func (x Uint32x16) RotateAllRightMasked(shift uint8, y Mask32x16) Uint32x16 +func (x Uint32x16) RotateAllRightMasked(shift uint8, mask Mask32x16) Uint32x16 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate. // // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPRORQ, CPU Feature: AVX512F -func (x Uint64x2) RotateAllRightMasked(shift uint8, y Mask64x2) Uint64x2 +func (x Uint64x2) RotateAllRightMasked(shift uint8, mask Mask64x2) Uint64x2 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate. // // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPRORQ, CPU Feature: AVX512F -func (x Uint64x4) RotateAllRightMasked(shift uint8, y Mask64x4) Uint64x4 +func (x Uint64x4) RotateAllRightMasked(shift uint8, mask Mask64x4) Uint64x4 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate. // // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPRORQ, CPU Feature: AVX512F -func (x Uint64x8) RotateAllRightMasked(shift uint8, y Mask64x8) Uint64x8 +func (x Uint64x8) RotateAllRightMasked(shift uint8, mask Mask64x8) Uint64x8 /* RotateLeft */ @@ -6870,62 +6870,62 @@ func (x Uint64x8) RotateLeft(y Uint64x8) Uint64x8 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements. // // Asm: VPROLVD, CPU Feature: AVX512F -func (x Int32x4) RotateLeftMasked(y Int32x4, z Mask32x4) Int32x4 +func (x Int32x4) RotateLeftMasked(y Int32x4, mask Mask32x4) Int32x4 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements. // // Asm: VPROLVD, CPU Feature: AVX512F -func (x Int32x8) RotateLeftMasked(y Int32x8, z Mask32x8) Int32x8 +func (x Int32x8) RotateLeftMasked(y Int32x8, mask Mask32x8) Int32x8 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements. // // Asm: VPROLVD, CPU Feature: AVX512F -func (x Int32x16) RotateLeftMasked(y Int32x16, z Mask32x16) Int32x16 +func (x Int32x16) RotateLeftMasked(y Int32x16, mask Mask32x16) Int32x16 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements. // // Asm: VPROLVQ, CPU Feature: AVX512F -func (x Int64x2) RotateLeftMasked(y Int64x2, z Mask64x2) Int64x2 +func (x Int64x2) RotateLeftMasked(y Int64x2, mask Mask64x2) Int64x2 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements. // // Asm: VPROLVQ, CPU Feature: AVX512F -func (x Int64x4) RotateLeftMasked(y Int64x4, z Mask64x4) Int64x4 +func (x Int64x4) RotateLeftMasked(y Int64x4, mask Mask64x4) Int64x4 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements. // // Asm: VPROLVQ, CPU Feature: AVX512F -func (x Int64x8) RotateLeftMasked(y Int64x8, z Mask64x8) Int64x8 +func (x Int64x8) RotateLeftMasked(y Int64x8, mask Mask64x8) Int64x8 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements. // // Asm: VPROLVD, CPU Feature: AVX512F -func (x Uint32x4) RotateLeftMasked(y Uint32x4, z Mask32x4) Uint32x4 +func (x Uint32x4) RotateLeftMasked(y Uint32x4, mask Mask32x4) Uint32x4 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements. // // Asm: VPROLVD, CPU Feature: AVX512F -func (x Uint32x8) RotateLeftMasked(y Uint32x8, z Mask32x8) Uint32x8 +func (x Uint32x8) RotateLeftMasked(y Uint32x8, mask Mask32x8) Uint32x8 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements. // // Asm: VPROLVD, CPU Feature: AVX512F -func (x Uint32x16) RotateLeftMasked(y Uint32x16, z Mask32x16) Uint32x16 +func (x Uint32x16) RotateLeftMasked(y Uint32x16, mask Mask32x16) Uint32x16 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements. // // Asm: VPROLVQ, CPU Feature: AVX512F -func (x Uint64x2) RotateLeftMasked(y Uint64x2, z Mask64x2) Uint64x2 +func (x Uint64x2) RotateLeftMasked(y Uint64x2, mask Mask64x2) Uint64x2 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements. // // Asm: VPROLVQ, CPU Feature: AVX512F -func (x Uint64x4) RotateLeftMasked(y Uint64x4, z Mask64x4) Uint64x4 +func (x Uint64x4) RotateLeftMasked(y Uint64x4, mask Mask64x4) Uint64x4 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements. // // Asm: VPROLVQ, CPU Feature: AVX512F -func (x Uint64x8) RotateLeftMasked(y Uint64x8, z Mask64x8) Uint64x8 +func (x Uint64x8) RotateLeftMasked(y Uint64x8, mask Mask64x8) Uint64x8 /* RotateRight */ @@ -6994,62 +6994,62 @@ func (x Uint64x8) RotateRight(y Uint64x8) Uint64x8 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements. // // Asm: VPRORVD, CPU Feature: AVX512F -func (x Int32x4) RotateRightMasked(y Int32x4, z Mask32x4) Int32x4 +func (x Int32x4) RotateRightMasked(y Int32x4, mask Mask32x4) Int32x4 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements. // // Asm: VPRORVD, CPU Feature: AVX512F -func (x Int32x8) RotateRightMasked(y Int32x8, z Mask32x8) Int32x8 +func (x Int32x8) RotateRightMasked(y Int32x8, mask Mask32x8) Int32x8 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements. // // Asm: VPRORVD, CPU Feature: AVX512F -func (x Int32x16) RotateRightMasked(y Int32x16, z Mask32x16) Int32x16 +func (x Int32x16) RotateRightMasked(y Int32x16, mask Mask32x16) Int32x16 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements. // // Asm: VPRORVQ, CPU Feature: AVX512F -func (x Int64x2) RotateRightMasked(y Int64x2, z Mask64x2) Int64x2 +func (x Int64x2) RotateRightMasked(y Int64x2, mask Mask64x2) Int64x2 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements. // // Asm: VPRORVQ, CPU Feature: AVX512F -func (x Int64x4) RotateRightMasked(y Int64x4, z Mask64x4) Int64x4 +func (x Int64x4) RotateRightMasked(y Int64x4, mask Mask64x4) Int64x4 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements. // // Asm: VPRORVQ, CPU Feature: AVX512F -func (x Int64x8) RotateRightMasked(y Int64x8, z Mask64x8) Int64x8 +func (x Int64x8) RotateRightMasked(y Int64x8, mask Mask64x8) Int64x8 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements. // // Asm: VPRORVD, CPU Feature: AVX512F -func (x Uint32x4) RotateRightMasked(y Uint32x4, z Mask32x4) Uint32x4 +func (x Uint32x4) RotateRightMasked(y Uint32x4, mask Mask32x4) Uint32x4 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements. // // Asm: VPRORVD, CPU Feature: AVX512F -func (x Uint32x8) RotateRightMasked(y Uint32x8, z Mask32x8) Uint32x8 +func (x Uint32x8) RotateRightMasked(y Uint32x8, mask Mask32x8) Uint32x8 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements. // // Asm: VPRORVD, CPU Feature: AVX512F -func (x Uint32x16) RotateRightMasked(y Uint32x16, z Mask32x16) Uint32x16 +func (x Uint32x16) RotateRightMasked(y Uint32x16, mask Mask32x16) Uint32x16 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements. // // Asm: VPRORVQ, CPU Feature: AVX512F -func (x Uint64x2) RotateRightMasked(y Uint64x2, z Mask64x2) Uint64x2 +func (x Uint64x2) RotateRightMasked(y Uint64x2, mask Mask64x2) Uint64x2 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements. // // Asm: VPRORVQ, CPU Feature: AVX512F -func (x Uint64x4) RotateRightMasked(y Uint64x4, z Mask64x4) Uint64x4 +func (x Uint64x4) RotateRightMasked(y Uint64x4, mask Mask64x4) Uint64x4 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements. // // Asm: VPRORVQ, CPU Feature: AVX512F -func (x Uint64x8) RotateRightMasked(y Uint64x8, z Mask64x8) Uint64x8 +func (x Uint64x8) RotateRightMasked(y Uint64x8, mask Mask64x8) Uint64x8 /* Round */ @@ -7124,42 +7124,42 @@ func (x Float64x8) RoundWithPrecision(prec uint8) Float64x8 // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPS, CPU Feature: AVX512F -func (x Float32x4) RoundWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4 +func (x Float32x4) RoundWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4 // RoundWithPrecisionMasked rounds elements with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPS, CPU Feature: AVX512F -func (x Float32x8) RoundWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8 +func (x Float32x8) RoundWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8 // RoundWithPrecisionMasked rounds elements with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPS, CPU Feature: AVX512F -func (x Float32x16) RoundWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16 +func (x Float32x16) RoundWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16 // RoundWithPrecisionMasked rounds elements with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPD, CPU Feature: AVX512F -func (x Float64x2) RoundWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2 +func (x Float64x2) RoundWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2 // RoundWithPrecisionMasked rounds elements with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPD, CPU Feature: AVX512F -func (x Float64x4) RoundWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4 +func (x Float64x4) RoundWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4 // RoundWithPrecisionMasked rounds elements with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPD, CPU Feature: AVX512F -func (x Float64x8) RoundWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8 +func (x Float64x8) RoundWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8 /* SaturatedAdd */ @@ -7228,62 +7228,62 @@ func (x Uint16x32) SaturatedAdd(y Uint16x32) Uint16x32 // SaturatedAddMasked adds corresponding elements of two vectors with saturation. // // Asm: VPADDSB, CPU Feature: AVX512BW -func (x Int8x16) SaturatedAddMasked(y Int8x16, z Mask8x16) Int8x16 +func (x Int8x16) SaturatedAddMasked(y Int8x16, mask Mask8x16) Int8x16 // SaturatedAddMasked adds corresponding elements of two vectors with saturation. // // Asm: VPADDSB, CPU Feature: AVX512BW -func (x Int8x32) SaturatedAddMasked(y Int8x32, z Mask8x32) Int8x32 +func (x Int8x32) SaturatedAddMasked(y Int8x32, mask Mask8x32) Int8x32 // SaturatedAddMasked adds corresponding elements of two vectors with saturation. // // Asm: VPADDSB, CPU Feature: AVX512BW -func (x Int8x64) SaturatedAddMasked(y Int8x64, z Mask8x64) Int8x64 +func (x Int8x64) SaturatedAddMasked(y Int8x64, mask Mask8x64) Int8x64 // SaturatedAddMasked adds corresponding elements of two vectors with saturation. // // Asm: VPADDSW, CPU Feature: AVX512BW -func (x Int16x8) SaturatedAddMasked(y Int16x8, z Mask16x8) Int16x8 +func (x Int16x8) SaturatedAddMasked(y Int16x8, mask Mask16x8) Int16x8 // SaturatedAddMasked adds corresponding elements of two vectors with saturation. // // Asm: VPADDSW, CPU Feature: AVX512BW -func (x Int16x16) SaturatedAddMasked(y Int16x16, z Mask16x16) Int16x16 +func (x Int16x16) SaturatedAddMasked(y Int16x16, mask Mask16x16) Int16x16 // SaturatedAddMasked adds corresponding elements of two vectors with saturation. // // Asm: VPADDSW, CPU Feature: AVX512BW -func (x Int16x32) SaturatedAddMasked(y Int16x32, z Mask16x32) Int16x32 +func (x Int16x32) SaturatedAddMasked(y Int16x32, mask Mask16x32) Int16x32 // SaturatedAddMasked adds corresponding elements of two vectors with saturation. // // Asm: VPADDSB, CPU Feature: AVX512BW -func (x Uint8x16) SaturatedAddMasked(y Uint8x16, z Mask8x16) Uint8x16 +func (x Uint8x16) SaturatedAddMasked(y Uint8x16, mask Mask8x16) Uint8x16 // SaturatedAddMasked adds corresponding elements of two vectors with saturation. // // Asm: VPADDSB, CPU Feature: AVX512BW -func (x Uint8x32) SaturatedAddMasked(y Uint8x32, z Mask8x32) Uint8x32 +func (x Uint8x32) SaturatedAddMasked(y Uint8x32, mask Mask8x32) Uint8x32 // SaturatedAddMasked adds corresponding elements of two vectors with saturation. // // Asm: VPADDSB, CPU Feature: AVX512BW -func (x Uint8x64) SaturatedAddMasked(y Uint8x64, z Mask8x64) Uint8x64 +func (x Uint8x64) SaturatedAddMasked(y Uint8x64, mask Mask8x64) Uint8x64 // SaturatedAddMasked adds corresponding elements of two vectors with saturation. // // Asm: VPADDSW, CPU Feature: AVX512BW -func (x Uint16x8) SaturatedAddMasked(y Uint16x8, z Mask16x8) Uint16x8 +func (x Uint16x8) SaturatedAddMasked(y Uint16x8, mask Mask16x8) Uint16x8 // SaturatedAddMasked adds corresponding elements of two vectors with saturation. // // Asm: VPADDSW, CPU Feature: AVX512BW -func (x Uint16x16) SaturatedAddMasked(y Uint16x16, z Mask16x16) Uint16x16 +func (x Uint16x16) SaturatedAddMasked(y Uint16x16, mask Mask16x16) Uint16x16 // SaturatedAddMasked adds corresponding elements of two vectors with saturation. // // Asm: VPADDSW, CPU Feature: AVX512BW -func (x Uint16x32) SaturatedAddMasked(y Uint16x32, z Mask16x32) Uint16x32 +func (x Uint16x32) SaturatedAddMasked(y Uint16x32, mask Mask16x32) Uint16x32 /* SaturatedPairDotProdAccumulate */ @@ -7307,17 +7307,17 @@ func (x Int32x16) SaturatedPairDotProdAccumulate(y Int16x32, z Int16x32) Int32x1 // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x. // // Asm: VPDPWSSDS, CPU Feature: AVX512VNNI -func (x Int32x4) SaturatedPairDotProdAccumulateMasked(y Int16x8, z Int16x8, u Mask32x4) Int32x4 +func (x Int32x4) SaturatedPairDotProdAccumulateMasked(y Int16x8, z Int16x8, mask Mask32x4) Int32x4 // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x. // // Asm: VPDPWSSDS, CPU Feature: AVX512VNNI -func (x Int32x8) SaturatedPairDotProdAccumulateMasked(y Int16x16, z Int16x16, u Mask32x8) Int32x8 +func (x Int32x8) SaturatedPairDotProdAccumulateMasked(y Int16x16, z Int16x16, mask Mask32x8) Int32x8 // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x. // // Asm: VPDPWSSDS, CPU Feature: AVX512VNNI -func (x Int32x16) SaturatedPairDotProdAccumulateMasked(y Int16x32, z Int16x32, u Mask32x16) Int32x16 +func (x Int32x16) SaturatedPairDotProdAccumulateMasked(y Int16x32, z Int16x32, mask Mask32x16) Int32x16 /* SaturatedPairwiseAdd */ @@ -7414,62 +7414,62 @@ func (x Uint16x32) SaturatedSub(y Uint16x32) Uint16x32 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation. // // Asm: VPSUBSB, CPU Feature: AVX512BW -func (x Int8x16) SaturatedSubMasked(y Int8x16, z Mask8x16) Int8x16 +func (x Int8x16) SaturatedSubMasked(y Int8x16, mask Mask8x16) Int8x16 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation. // // Asm: VPSUBSB, CPU Feature: AVX512BW -func (x Int8x32) SaturatedSubMasked(y Int8x32, z Mask8x32) Int8x32 +func (x Int8x32) SaturatedSubMasked(y Int8x32, mask Mask8x32) Int8x32 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation. // // Asm: VPSUBSB, CPU Feature: AVX512BW -func (x Int8x64) SaturatedSubMasked(y Int8x64, z Mask8x64) Int8x64 +func (x Int8x64) SaturatedSubMasked(y Int8x64, mask Mask8x64) Int8x64 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation. // // Asm: VPSUBSW, CPU Feature: AVX512BW -func (x Int16x8) SaturatedSubMasked(y Int16x8, z Mask16x8) Int16x8 +func (x Int16x8) SaturatedSubMasked(y Int16x8, mask Mask16x8) Int16x8 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation. // // Asm: VPSUBSW, CPU Feature: AVX512BW -func (x Int16x16) SaturatedSubMasked(y Int16x16, z Mask16x16) Int16x16 +func (x Int16x16) SaturatedSubMasked(y Int16x16, mask Mask16x16) Int16x16 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation. // // Asm: VPSUBSW, CPU Feature: AVX512BW -func (x Int16x32) SaturatedSubMasked(y Int16x32, z Mask16x32) Int16x32 +func (x Int16x32) SaturatedSubMasked(y Int16x32, mask Mask16x32) Int16x32 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation. // // Asm: VPSUBSB, CPU Feature: AVX512BW -func (x Uint8x16) SaturatedSubMasked(y Uint8x16, z Mask8x16) Uint8x16 +func (x Uint8x16) SaturatedSubMasked(y Uint8x16, mask Mask8x16) Uint8x16 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation. // // Asm: VPSUBSB, CPU Feature: AVX512BW -func (x Uint8x32) SaturatedSubMasked(y Uint8x32, z Mask8x32) Uint8x32 +func (x Uint8x32) SaturatedSubMasked(y Uint8x32, mask Mask8x32) Uint8x32 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation. // // Asm: VPSUBSB, CPU Feature: AVX512BW -func (x Uint8x64) SaturatedSubMasked(y Uint8x64, z Mask8x64) Uint8x64 +func (x Uint8x64) SaturatedSubMasked(y Uint8x64, mask Mask8x64) Uint8x64 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation. // // Asm: VPSUBSW, CPU Feature: AVX512BW -func (x Uint16x8) SaturatedSubMasked(y Uint16x8, z Mask16x8) Uint16x8 +func (x Uint16x8) SaturatedSubMasked(y Uint16x8, mask Mask16x8) Uint16x8 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation. // // Asm: VPSUBSW, CPU Feature: AVX512BW -func (x Uint16x16) SaturatedSubMasked(y Uint16x16, z Mask16x16) Uint16x16 +func (x Uint16x16) SaturatedSubMasked(y Uint16x16, mask Mask16x16) Uint16x16 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation. // // Asm: VPSUBSW, CPU Feature: AVX512BW -func (x Uint16x32) SaturatedSubMasked(y Uint16x32, z Mask16x32) Uint16x32 +func (x Uint16x32) SaturatedSubMasked(y Uint16x32, mask Mask16x32) Uint16x32 /* SaturatedUnsignedSignedPairDotProd */ @@ -7497,19 +7497,19 @@ func (x Uint8x64) SaturatedUnsignedSignedPairDotProd(y Int8x64) Int16x32 // yielding a vector of half as many elements with twice the input element size. // // Asm: VPMADDUBSW, CPU Feature: AVX512BW -func (x Uint8x16) SaturatedUnsignedSignedPairDotProdMasked(y Int8x16, z Mask16x8) Int16x8 +func (x Uint8x16) SaturatedUnsignedSignedPairDotProdMasked(y Int8x16, mask Mask16x8) Int16x8 // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. // // Asm: VPMADDUBSW, CPU Feature: AVX512BW -func (x Uint8x32) SaturatedUnsignedSignedPairDotProdMasked(y Int8x32, z Mask16x16) Int16x16 +func (x Uint8x32) SaturatedUnsignedSignedPairDotProdMasked(y Int8x32, mask Mask16x16) Int16x16 // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. // // Asm: VPMADDUBSW, CPU Feature: AVX512BW -func (x Uint8x64) SaturatedUnsignedSignedPairDotProdMasked(y Int8x64, z Mask16x32) Int16x32 +func (x Uint8x64) SaturatedUnsignedSignedPairDotProdMasked(y Int8x64, mask Mask16x32) Int16x32 /* SaturatedUnsignedSignedQuadDotProdAccumulate */ @@ -7548,32 +7548,32 @@ func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z In // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI -func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, u Mask32x4) Int32x4 +func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, mask Mask32x4) Int32x4 // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI -func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, u Mask32x8) Int32x8 +func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, mask Mask32x8) Int32x8 // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI -func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, u Mask32x16) Int32x16 +func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, mask Mask32x16) Int32x16 // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI -func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, u Mask32x4) Uint32x4 +func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, mask Mask32x4) Uint32x4 // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI -func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, u Mask32x8) Uint32x8 +func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, mask Mask32x8) Uint32x8 // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI -func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, u Mask32x16) Uint32x16 +func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, mask Mask32x16) Uint32x16 /* Set128 */ @@ -7951,7 +7951,7 @@ func (x Uint64x8) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint64x8) Uint64x8 // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHLDW, CPU Feature: AVX512VBMI2 -func (x Int16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x8, z Mask16x8) Int16x8 +func (x Int16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x8, mask Mask16x8) Int16x8 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. @@ -7959,7 +7959,7 @@ func (x Int16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x8, z Ma // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHLDW, CPU Feature: AVX512VBMI2 -func (x Int16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x16, z Mask16x16) Int16x16 +func (x Int16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x16, mask Mask16x16) Int16x16 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. @@ -7967,7 +7967,7 @@ func (x Int16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x16, z // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHLDW, CPU Feature: AVX512VBMI2 -func (x Int16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x32, z Mask16x32) Int16x32 +func (x Int16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x32, mask Mask16x32) Int16x32 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. @@ -7975,7 +7975,7 @@ func (x Int16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x32, z // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHLDD, CPU Feature: AVX512VBMI2 -func (x Int32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x4, z Mask32x4) Int32x4 +func (x Int32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x4, mask Mask32x4) Int32x4 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. @@ -7983,7 +7983,7 @@ func (x Int32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x4, z Ma // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHLDD, CPU Feature: AVX512VBMI2 -func (x Int32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x8, z Mask32x8) Int32x8 +func (x Int32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x8, mask Mask32x8) Int32x8 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. @@ -7991,7 +7991,7 @@ func (x Int32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x8, z Ma // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHLDD, CPU Feature: AVX512VBMI2 -func (x Int32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x16, z Mask32x16) Int32x16 +func (x Int32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x16, mask Mask32x16) Int32x16 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. @@ -7999,7 +7999,7 @@ func (x Int32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x16, z // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2 -func (x Int64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x2, z Mask64x2) Int64x2 +func (x Int64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x2, mask Mask64x2) Int64x2 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. @@ -8007,7 +8007,7 @@ func (x Int64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x2, z Ma // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2 -func (x Int64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x4, z Mask64x4) Int64x4 +func (x Int64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x4, mask Mask64x4) Int64x4 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. @@ -8015,7 +8015,7 @@ func (x Int64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x4, z Ma // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2 -func (x Int64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x8, z Mask64x8) Int64x8 +func (x Int64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x8, mask Mask64x8) Int64x8 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. @@ -8023,7 +8023,7 @@ func (x Int64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x8, z Ma // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHLDW, CPU Feature: AVX512VBMI2 -func (x Uint16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x8, z Mask16x8) Uint16x8 +func (x Uint16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x8, mask Mask16x8) Uint16x8 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. @@ -8031,7 +8031,7 @@ func (x Uint16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x8, z // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHLDW, CPU Feature: AVX512VBMI2 -func (x Uint16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x16, z Mask16x16) Uint16x16 +func (x Uint16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x16, mask Mask16x16) Uint16x16 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. @@ -8039,7 +8039,7 @@ func (x Uint16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x16, // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHLDW, CPU Feature: AVX512VBMI2 -func (x Uint16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x32, z Mask16x32) Uint16x32 +func (x Uint16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x32, mask Mask16x32) Uint16x32 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. @@ -8047,7 +8047,7 @@ func (x Uint16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x32, // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHLDD, CPU Feature: AVX512VBMI2 -func (x Uint32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x4, z Mask32x4) Uint32x4 +func (x Uint32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x4, mask Mask32x4) Uint32x4 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. @@ -8055,7 +8055,7 @@ func (x Uint32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x4, z // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHLDD, CPU Feature: AVX512VBMI2 -func (x Uint32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x8, z Mask32x8) Uint32x8 +func (x Uint32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x8, mask Mask32x8) Uint32x8 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. @@ -8063,7 +8063,7 @@ func (x Uint32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x8, z // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHLDD, CPU Feature: AVX512VBMI2 -func (x Uint32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x16, z Mask32x16) Uint32x16 +func (x Uint32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x16, mask Mask32x16) Uint32x16 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. @@ -8071,7 +8071,7 @@ func (x Uint32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x16, // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2 -func (x Uint64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x2, z Mask64x2) Uint64x2 +func (x Uint64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x2, mask Mask64x2) Uint64x2 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. @@ -8079,7 +8079,7 @@ func (x Uint64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x2, z // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2 -func (x Uint64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x4, z Mask64x4) Uint64x4 +func (x Uint64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x4, mask Mask64x4) Uint64x4 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. @@ -8087,99 +8087,99 @@ func (x Uint64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x4, z // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2 -func (x Uint64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x8, z Mask64x8) Uint64x8 +func (x Uint64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x8, mask Mask64x8) Uint64x8 /* ShiftAllLeftMasked */ // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLW, CPU Feature: AVX512BW -func (x Int16x8) ShiftAllLeftMasked(y uint64, z Mask16x8) Int16x8 +func (x Int16x8) ShiftAllLeftMasked(y uint64, mask Mask16x8) Int16x8 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLW, CPU Feature: AVX512BW -func (x Int16x16) ShiftAllLeftMasked(y uint64, z Mask16x16) Int16x16 +func (x Int16x16) ShiftAllLeftMasked(y uint64, mask Mask16x16) Int16x16 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLW, CPU Feature: AVX512BW -func (x Int16x32) ShiftAllLeftMasked(y uint64, z Mask16x32) Int16x32 +func (x Int16x32) ShiftAllLeftMasked(y uint64, mask Mask16x32) Int16x32 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLD, CPU Feature: AVX512F -func (x Int32x4) ShiftAllLeftMasked(y uint64, z Mask32x4) Int32x4 +func (x Int32x4) ShiftAllLeftMasked(y uint64, mask Mask32x4) Int32x4 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLD, CPU Feature: AVX512F -func (x Int32x8) ShiftAllLeftMasked(y uint64, z Mask32x8) Int32x8 +func (x Int32x8) ShiftAllLeftMasked(y uint64, mask Mask32x8) Int32x8 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLD, CPU Feature: AVX512F -func (x Int32x16) ShiftAllLeftMasked(y uint64, z Mask32x16) Int32x16 +func (x Int32x16) ShiftAllLeftMasked(y uint64, mask Mask32x16) Int32x16 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLQ, CPU Feature: AVX512F -func (x Int64x2) ShiftAllLeftMasked(y uint64, z Mask64x2) Int64x2 +func (x Int64x2) ShiftAllLeftMasked(y uint64, mask Mask64x2) Int64x2 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLQ, CPU Feature: AVX512F -func (x Int64x4) ShiftAllLeftMasked(y uint64, z Mask64x4) Int64x4 +func (x Int64x4) ShiftAllLeftMasked(y uint64, mask Mask64x4) Int64x4 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLQ, CPU Feature: AVX512F -func (x Int64x8) ShiftAllLeftMasked(y uint64, z Mask64x8) Int64x8 +func (x Int64x8) ShiftAllLeftMasked(y uint64, mask Mask64x8) Int64x8 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLW, CPU Feature: AVX512BW -func (x Uint16x8) ShiftAllLeftMasked(y uint64, z Mask16x8) Uint16x8 +func (x Uint16x8) ShiftAllLeftMasked(y uint64, mask Mask16x8) Uint16x8 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLW, CPU Feature: AVX512BW -func (x Uint16x16) ShiftAllLeftMasked(y uint64, z Mask16x16) Uint16x16 +func (x Uint16x16) ShiftAllLeftMasked(y uint64, mask Mask16x16) Uint16x16 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLW, CPU Feature: AVX512BW -func (x Uint16x32) ShiftAllLeftMasked(y uint64, z Mask16x32) Uint16x32 +func (x Uint16x32) ShiftAllLeftMasked(y uint64, mask Mask16x32) Uint16x32 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLD, CPU Feature: AVX512F -func (x Uint32x4) ShiftAllLeftMasked(y uint64, z Mask32x4) Uint32x4 +func (x Uint32x4) ShiftAllLeftMasked(y uint64, mask Mask32x4) Uint32x4 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLD, CPU Feature: AVX512F -func (x Uint32x8) ShiftAllLeftMasked(y uint64, z Mask32x8) Uint32x8 +func (x Uint32x8) ShiftAllLeftMasked(y uint64, mask Mask32x8) Uint32x8 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLD, CPU Feature: AVX512F -func (x Uint32x16) ShiftAllLeftMasked(y uint64, z Mask32x16) Uint32x16 +func (x Uint32x16) ShiftAllLeftMasked(y uint64, mask Mask32x16) Uint32x16 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLQ, CPU Feature: AVX512F -func (x Uint64x2) ShiftAllLeftMasked(y uint64, z Mask64x2) Uint64x2 +func (x Uint64x2) ShiftAllLeftMasked(y uint64, mask Mask64x2) Uint64x2 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLQ, CPU Feature: AVX512F -func (x Uint64x4) ShiftAllLeftMasked(y uint64, z Mask64x4) Uint64x4 +func (x Uint64x4) ShiftAllLeftMasked(y uint64, mask Mask64x4) Uint64x4 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLQ, CPU Feature: AVX512F -func (x Uint64x8) ShiftAllLeftMasked(y uint64, z Mask64x8) Uint64x8 +func (x Uint64x8) ShiftAllLeftMasked(y uint64, mask Mask64x8) Uint64x8 /* ShiftAllRight */ @@ -8427,7 +8427,7 @@ func (x Uint64x8) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint64x8) Uint64x // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHRDW, CPU Feature: AVX512VBMI2 -func (x Int16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x8, z Mask16x8) Int16x8 +func (x Int16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x8, mask Mask16x8) Int16x8 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. @@ -8435,7 +8435,7 @@ func (x Int16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x8, z M // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHRDW, CPU Feature: AVX512VBMI2 -func (x Int16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x16, z Mask16x16) Int16x16 +func (x Int16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x16, mask Mask16x16) Int16x16 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. @@ -8443,7 +8443,7 @@ func (x Int16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x16, z // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHRDW, CPU Feature: AVX512VBMI2 -func (x Int16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x32, z Mask16x32) Int16x32 +func (x Int16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x32, mask Mask16x32) Int16x32 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. @@ -8451,7 +8451,7 @@ func (x Int16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x32, z // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHRDD, CPU Feature: AVX512VBMI2 -func (x Int32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x4, z Mask32x4) Int32x4 +func (x Int32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x4, mask Mask32x4) Int32x4 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. @@ -8459,7 +8459,7 @@ func (x Int32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x4, z M // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHRDD, CPU Feature: AVX512VBMI2 -func (x Int32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x8, z Mask32x8) Int32x8 +func (x Int32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x8, mask Mask32x8) Int32x8 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. @@ -8467,7 +8467,7 @@ func (x Int32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x8, z M // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHRDD, CPU Feature: AVX512VBMI2 -func (x Int32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x16, z Mask32x16) Int32x16 +func (x Int32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x16, mask Mask32x16) Int32x16 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. @@ -8475,7 +8475,7 @@ func (x Int32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x16, z // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2 -func (x Int64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x2, z Mask64x2) Int64x2 +func (x Int64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x2, mask Mask64x2) Int64x2 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. @@ -8483,7 +8483,7 @@ func (x Int64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x2, z M // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2 -func (x Int64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x4, z Mask64x4) Int64x4 +func (x Int64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x4, mask Mask64x4) Int64x4 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. @@ -8491,7 +8491,7 @@ func (x Int64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x4, z M // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2 -func (x Int64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x8, z Mask64x8) Int64x8 +func (x Int64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x8, mask Mask64x8) Int64x8 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. @@ -8499,7 +8499,7 @@ func (x Int64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x8, z M // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHRDW, CPU Feature: AVX512VBMI2 -func (x Uint16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x8, z Mask16x8) Uint16x8 +func (x Uint16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x8, mask Mask16x8) Uint16x8 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. @@ -8507,7 +8507,7 @@ func (x Uint16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x8, z // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHRDW, CPU Feature: AVX512VBMI2 -func (x Uint16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x16, z Mask16x16) Uint16x16 +func (x Uint16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x16, mask Mask16x16) Uint16x16 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. @@ -8515,7 +8515,7 @@ func (x Uint16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x16, // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHRDW, CPU Feature: AVX512VBMI2 -func (x Uint16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x32, z Mask16x32) Uint16x32 +func (x Uint16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x32, mask Mask16x32) Uint16x32 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. @@ -8523,7 +8523,7 @@ func (x Uint16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x32, // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHRDD, CPU Feature: AVX512VBMI2 -func (x Uint32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x4, z Mask32x4) Uint32x4 +func (x Uint32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x4, mask Mask32x4) Uint32x4 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. @@ -8531,7 +8531,7 @@ func (x Uint32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x4, z // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHRDD, CPU Feature: AVX512VBMI2 -func (x Uint32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x8, z Mask32x8) Uint32x8 +func (x Uint32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x8, mask Mask32x8) Uint32x8 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. @@ -8539,7 +8539,7 @@ func (x Uint32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x8, z // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHRDD, CPU Feature: AVX512VBMI2 -func (x Uint32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x16, z Mask32x16) Uint32x16 +func (x Uint32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x16, mask Mask32x16) Uint32x16 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. @@ -8547,7 +8547,7 @@ func (x Uint32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x16, // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2 -func (x Uint64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x2, z Mask64x2) Uint64x2 +func (x Uint64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x2, mask Mask64x2) Uint64x2 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. @@ -8555,7 +8555,7 @@ func (x Uint64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x2, z // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2 -func (x Uint64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x4, z Mask64x4) Uint64x4 +func (x Uint64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x4, mask Mask64x4) Uint64x4 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. @@ -8563,99 +8563,99 @@ func (x Uint64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x4, z // shift is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2 -func (x Uint64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x8, z Mask64x8) Uint64x8 +func (x Uint64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x8, mask Mask64x8) Uint64x8 /* ShiftAllRightMasked */ // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAW, CPU Feature: AVX512BW -func (x Int16x8) ShiftAllRightMasked(y uint64, z Mask16x8) Int16x8 +func (x Int16x8) ShiftAllRightMasked(y uint64, mask Mask16x8) Int16x8 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAW, CPU Feature: AVX512BW -func (x Int16x16) ShiftAllRightMasked(y uint64, z Mask16x16) Int16x16 +func (x Int16x16) ShiftAllRightMasked(y uint64, mask Mask16x16) Int16x16 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAW, CPU Feature: AVX512BW -func (x Int16x32) ShiftAllRightMasked(y uint64, z Mask16x32) Int16x32 +func (x Int16x32) ShiftAllRightMasked(y uint64, mask Mask16x32) Int16x32 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAD, CPU Feature: AVX512F -func (x Int32x4) ShiftAllRightMasked(y uint64, z Mask32x4) Int32x4 +func (x Int32x4) ShiftAllRightMasked(y uint64, mask Mask32x4) Int32x4 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAD, CPU Feature: AVX512F -func (x Int32x8) ShiftAllRightMasked(y uint64, z Mask32x8) Int32x8 +func (x Int32x8) ShiftAllRightMasked(y uint64, mask Mask32x8) Int32x8 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAD, CPU Feature: AVX512F -func (x Int32x16) ShiftAllRightMasked(y uint64, z Mask32x16) Int32x16 +func (x Int32x16) ShiftAllRightMasked(y uint64, mask Mask32x16) Int32x16 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAQ, CPU Feature: AVX512F -func (x Int64x2) ShiftAllRightMasked(y uint64, z Mask64x2) Int64x2 +func (x Int64x2) ShiftAllRightMasked(y uint64, mask Mask64x2) Int64x2 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAQ, CPU Feature: AVX512F -func (x Int64x4) ShiftAllRightMasked(y uint64, z Mask64x4) Int64x4 +func (x Int64x4) ShiftAllRightMasked(y uint64, mask Mask64x4) Int64x4 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAQ, CPU Feature: AVX512F -func (x Int64x8) ShiftAllRightMasked(y uint64, z Mask64x8) Int64x8 +func (x Int64x8) ShiftAllRightMasked(y uint64, mask Mask64x8) Int64x8 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. // // Asm: VPSRLW, CPU Feature: AVX512BW -func (x Uint16x8) ShiftAllRightMasked(y uint64, z Mask16x8) Uint16x8 +func (x Uint16x8) ShiftAllRightMasked(y uint64, mask Mask16x8) Uint16x8 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. // // Asm: VPSRLW, CPU Feature: AVX512BW -func (x Uint16x16) ShiftAllRightMasked(y uint64, z Mask16x16) Uint16x16 +func (x Uint16x16) ShiftAllRightMasked(y uint64, mask Mask16x16) Uint16x16 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. // // Asm: VPSRLW, CPU Feature: AVX512BW -func (x Uint16x32) ShiftAllRightMasked(y uint64, z Mask16x32) Uint16x32 +func (x Uint16x32) ShiftAllRightMasked(y uint64, mask Mask16x32) Uint16x32 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. // // Asm: VPSRLD, CPU Feature: AVX512F -func (x Uint32x4) ShiftAllRightMasked(y uint64, z Mask32x4) Uint32x4 +func (x Uint32x4) ShiftAllRightMasked(y uint64, mask Mask32x4) Uint32x4 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. // // Asm: VPSRLD, CPU Feature: AVX512F -func (x Uint32x8) ShiftAllRightMasked(y uint64, z Mask32x8) Uint32x8 +func (x Uint32x8) ShiftAllRightMasked(y uint64, mask Mask32x8) Uint32x8 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. // // Asm: VPSRLD, CPU Feature: AVX512F -func (x Uint32x16) ShiftAllRightMasked(y uint64, z Mask32x16) Uint32x16 +func (x Uint32x16) ShiftAllRightMasked(y uint64, mask Mask32x16) Uint32x16 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. // // Asm: VPSRLQ, CPU Feature: AVX512F -func (x Uint64x2) ShiftAllRightMasked(y uint64, z Mask64x2) Uint64x2 +func (x Uint64x2) ShiftAllRightMasked(y uint64, mask Mask64x2) Uint64x2 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. // // Asm: VPSRLQ, CPU Feature: AVX512F -func (x Uint64x4) ShiftAllRightMasked(y uint64, z Mask64x4) Uint64x4 +func (x Uint64x4) ShiftAllRightMasked(y uint64, mask Mask64x4) Uint64x4 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. // // Asm: VPSRLQ, CPU Feature: AVX512F -func (x Uint64x8) ShiftAllRightMasked(y uint64, z Mask64x8) Uint64x8 +func (x Uint64x8) ShiftAllRightMasked(y uint64, mask Mask64x8) Uint64x8 /* ShiftLeft */ @@ -8865,201 +8865,201 @@ func (x Uint64x8) ShiftLeftAndFillUpperFrom(y Uint64x8, z Uint64x8) Uint64x8 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2 -func (x Int16x8) ShiftLeftAndFillUpperFromMasked(y Int16x8, z Int16x8, u Mask16x8) Int16x8 +func (x Int16x8) ShiftLeftAndFillUpperFromMasked(y Int16x8, z Int16x8, mask Mask16x8) Int16x8 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2 -func (x Int16x16) ShiftLeftAndFillUpperFromMasked(y Int16x16, z Int16x16, u Mask16x16) Int16x16 +func (x Int16x16) ShiftLeftAndFillUpperFromMasked(y Int16x16, z Int16x16, mask Mask16x16) Int16x16 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2 -func (x Int16x32) ShiftLeftAndFillUpperFromMasked(y Int16x32, z Int16x32, u Mask16x32) Int16x32 +func (x Int16x32) ShiftLeftAndFillUpperFromMasked(y Int16x32, z Int16x32, mask Mask16x32) Int16x32 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2 -func (x Int32x4) ShiftLeftAndFillUpperFromMasked(y Int32x4, z Int32x4, u Mask32x4) Int32x4 +func (x Int32x4) ShiftLeftAndFillUpperFromMasked(y Int32x4, z Int32x4, mask Mask32x4) Int32x4 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2 -func (x Int32x8) ShiftLeftAndFillUpperFromMasked(y Int32x8, z Int32x8, u Mask32x8) Int32x8 +func (x Int32x8) ShiftLeftAndFillUpperFromMasked(y Int32x8, z Int32x8, mask Mask32x8) Int32x8 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2 -func (x Int32x16) ShiftLeftAndFillUpperFromMasked(y Int32x16, z Int32x16, u Mask32x16) Int32x16 +func (x Int32x16) ShiftLeftAndFillUpperFromMasked(y Int32x16, z Int32x16, mask Mask32x16) Int32x16 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2 -func (x Int64x2) ShiftLeftAndFillUpperFromMasked(y Int64x2, z Int64x2, u Mask64x2) Int64x2 +func (x Int64x2) ShiftLeftAndFillUpperFromMasked(y Int64x2, z Int64x2, mask Mask64x2) Int64x2 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2 -func (x Int64x4) ShiftLeftAndFillUpperFromMasked(y Int64x4, z Int64x4, u Mask64x4) Int64x4 +func (x Int64x4) ShiftLeftAndFillUpperFromMasked(y Int64x4, z Int64x4, mask Mask64x4) Int64x4 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2 -func (x Int64x8) ShiftLeftAndFillUpperFromMasked(y Int64x8, z Int64x8, u Mask64x8) Int64x8 +func (x Int64x8) ShiftLeftAndFillUpperFromMasked(y Int64x8, z Int64x8, mask Mask64x8) Int64x8 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2 -func (x Uint16x8) ShiftLeftAndFillUpperFromMasked(y Uint16x8, z Uint16x8, u Mask16x8) Uint16x8 +func (x Uint16x8) ShiftLeftAndFillUpperFromMasked(y Uint16x8, z Uint16x8, mask Mask16x8) Uint16x8 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2 -func (x Uint16x16) ShiftLeftAndFillUpperFromMasked(y Uint16x16, z Uint16x16, u Mask16x16) Uint16x16 +func (x Uint16x16) ShiftLeftAndFillUpperFromMasked(y Uint16x16, z Uint16x16, mask Mask16x16) Uint16x16 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2 -func (x Uint16x32) ShiftLeftAndFillUpperFromMasked(y Uint16x32, z Uint16x32, u Mask16x32) Uint16x32 +func (x Uint16x32) ShiftLeftAndFillUpperFromMasked(y Uint16x32, z Uint16x32, mask Mask16x32) Uint16x32 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2 -func (x Uint32x4) ShiftLeftAndFillUpperFromMasked(y Uint32x4, z Uint32x4, u Mask32x4) Uint32x4 +func (x Uint32x4) ShiftLeftAndFillUpperFromMasked(y Uint32x4, z Uint32x4, mask Mask32x4) Uint32x4 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2 -func (x Uint32x8) ShiftLeftAndFillUpperFromMasked(y Uint32x8, z Uint32x8, u Mask32x8) Uint32x8 +func (x Uint32x8) ShiftLeftAndFillUpperFromMasked(y Uint32x8, z Uint32x8, mask Mask32x8) Uint32x8 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2 -func (x Uint32x16) ShiftLeftAndFillUpperFromMasked(y Uint32x16, z Uint32x16, u Mask32x16) Uint32x16 +func (x Uint32x16) ShiftLeftAndFillUpperFromMasked(y Uint32x16, z Uint32x16, mask Mask32x16) Uint32x16 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2 -func (x Uint64x2) ShiftLeftAndFillUpperFromMasked(y Uint64x2, z Uint64x2, u Mask64x2) Uint64x2 +func (x Uint64x2) ShiftLeftAndFillUpperFromMasked(y Uint64x2, z Uint64x2, mask Mask64x2) Uint64x2 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2 -func (x Uint64x4) ShiftLeftAndFillUpperFromMasked(y Uint64x4, z Uint64x4, u Mask64x4) Uint64x4 +func (x Uint64x4) ShiftLeftAndFillUpperFromMasked(y Uint64x4, z Uint64x4, mask Mask64x4) Uint64x4 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2 -func (x Uint64x8) ShiftLeftAndFillUpperFromMasked(y Uint64x8, z Uint64x8, u Mask64x8) Uint64x8 +func (x Uint64x8) ShiftLeftAndFillUpperFromMasked(y Uint64x8, z Uint64x8, mask Mask64x8) Uint64x8 /* ShiftLeftMasked */ // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. // // Asm: VPSLLVW, CPU Feature: AVX512BW -func (x Int16x8) ShiftLeftMasked(y Int16x8, z Mask16x8) Int16x8 +func (x Int16x8) ShiftLeftMasked(y Int16x8, mask Mask16x8) Int16x8 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. // // Asm: VPSLLVW, CPU Feature: AVX512BW -func (x Int16x16) ShiftLeftMasked(y Int16x16, z Mask16x16) Int16x16 +func (x Int16x16) ShiftLeftMasked(y Int16x16, mask Mask16x16) Int16x16 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. // // Asm: VPSLLVW, CPU Feature: AVX512BW -func (x Int16x32) ShiftLeftMasked(y Int16x32, z Mask16x32) Int16x32 +func (x Int16x32) ShiftLeftMasked(y Int16x32, mask Mask16x32) Int16x32 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. // // Asm: VPSLLVD, CPU Feature: AVX512F -func (x Int32x4) ShiftLeftMasked(y Int32x4, z Mask32x4) Int32x4 +func (x Int32x4) ShiftLeftMasked(y Int32x4, mask Mask32x4) Int32x4 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. // // Asm: VPSLLVD, CPU Feature: AVX512F -func (x Int32x8) ShiftLeftMasked(y Int32x8, z Mask32x8) Int32x8 +func (x Int32x8) ShiftLeftMasked(y Int32x8, mask Mask32x8) Int32x8 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. // // Asm: VPSLLVD, CPU Feature: AVX512F -func (x Int32x16) ShiftLeftMasked(y Int32x16, z Mask32x16) Int32x16 +func (x Int32x16) ShiftLeftMasked(y Int32x16, mask Mask32x16) Int32x16 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. // // Asm: VPSLLVQ, CPU Feature: AVX512F -func (x Int64x2) ShiftLeftMasked(y Int64x2, z Mask64x2) Int64x2 +func (x Int64x2) ShiftLeftMasked(y Int64x2, mask Mask64x2) Int64x2 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. // // Asm: VPSLLVQ, CPU Feature: AVX512F -func (x Int64x4) ShiftLeftMasked(y Int64x4, z Mask64x4) Int64x4 +func (x Int64x4) ShiftLeftMasked(y Int64x4, mask Mask64x4) Int64x4 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. // // Asm: VPSLLVQ, CPU Feature: AVX512F -func (x Int64x8) ShiftLeftMasked(y Int64x8, z Mask64x8) Int64x8 +func (x Int64x8) ShiftLeftMasked(y Int64x8, mask Mask64x8) Int64x8 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. // // Asm: VPSLLVW, CPU Feature: AVX512BW -func (x Uint16x8) ShiftLeftMasked(y Uint16x8, z Mask16x8) Uint16x8 +func (x Uint16x8) ShiftLeftMasked(y Uint16x8, mask Mask16x8) Uint16x8 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. // // Asm: VPSLLVW, CPU Feature: AVX512BW -func (x Uint16x16) ShiftLeftMasked(y Uint16x16, z Mask16x16) Uint16x16 +func (x Uint16x16) ShiftLeftMasked(y Uint16x16, mask Mask16x16) Uint16x16 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. // // Asm: VPSLLVW, CPU Feature: AVX512BW -func (x Uint16x32) ShiftLeftMasked(y Uint16x32, z Mask16x32) Uint16x32 +func (x Uint16x32) ShiftLeftMasked(y Uint16x32, mask Mask16x32) Uint16x32 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. // // Asm: VPSLLVD, CPU Feature: AVX512F -func (x Uint32x4) ShiftLeftMasked(y Uint32x4, z Mask32x4) Uint32x4 +func (x Uint32x4) ShiftLeftMasked(y Uint32x4, mask Mask32x4) Uint32x4 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. // // Asm: VPSLLVD, CPU Feature: AVX512F -func (x Uint32x8) ShiftLeftMasked(y Uint32x8, z Mask32x8) Uint32x8 +func (x Uint32x8) ShiftLeftMasked(y Uint32x8, mask Mask32x8) Uint32x8 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. // // Asm: VPSLLVD, CPU Feature: AVX512F -func (x Uint32x16) ShiftLeftMasked(y Uint32x16, z Mask32x16) Uint32x16 +func (x Uint32x16) ShiftLeftMasked(y Uint32x16, mask Mask32x16) Uint32x16 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. // // Asm: VPSLLVQ, CPU Feature: AVX512F -func (x Uint64x2) ShiftLeftMasked(y Uint64x2, z Mask64x2) Uint64x2 +func (x Uint64x2) ShiftLeftMasked(y Uint64x2, mask Mask64x2) Uint64x2 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. // // Asm: VPSLLVQ, CPU Feature: AVX512F -func (x Uint64x4) ShiftLeftMasked(y Uint64x4, z Mask64x4) Uint64x4 +func (x Uint64x4) ShiftLeftMasked(y Uint64x4, mask Mask64x4) Uint64x4 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. // // Asm: VPSLLVQ, CPU Feature: AVX512F -func (x Uint64x8) ShiftLeftMasked(y Uint64x8, z Mask64x8) Uint64x8 +func (x Uint64x8) ShiftLeftMasked(y Uint64x8, mask Mask64x8) Uint64x8 /* ShiftRight */ @@ -9269,201 +9269,201 @@ func (x Uint64x8) ShiftRightAndFillUpperFrom(y Uint64x8, z Uint64x8) Uint64x8 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2 -func (x Int16x8) ShiftRightAndFillUpperFromMasked(y Int16x8, z Int16x8, u Mask16x8) Int16x8 +func (x Int16x8) ShiftRightAndFillUpperFromMasked(y Int16x8, z Int16x8, mask Mask16x8) Int16x8 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2 -func (x Int16x16) ShiftRightAndFillUpperFromMasked(y Int16x16, z Int16x16, u Mask16x16) Int16x16 +func (x Int16x16) ShiftRightAndFillUpperFromMasked(y Int16x16, z Int16x16, mask Mask16x16) Int16x16 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2 -func (x Int16x32) ShiftRightAndFillUpperFromMasked(y Int16x32, z Int16x32, u Mask16x32) Int16x32 +func (x Int16x32) ShiftRightAndFillUpperFromMasked(y Int16x32, z Int16x32, mask Mask16x32) Int16x32 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2 -func (x Int32x4) ShiftRightAndFillUpperFromMasked(y Int32x4, z Int32x4, u Mask32x4) Int32x4 +func (x Int32x4) ShiftRightAndFillUpperFromMasked(y Int32x4, z Int32x4, mask Mask32x4) Int32x4 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2 -func (x Int32x8) ShiftRightAndFillUpperFromMasked(y Int32x8, z Int32x8, u Mask32x8) Int32x8 +func (x Int32x8) ShiftRightAndFillUpperFromMasked(y Int32x8, z Int32x8, mask Mask32x8) Int32x8 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2 -func (x Int32x16) ShiftRightAndFillUpperFromMasked(y Int32x16, z Int32x16, u Mask32x16) Int32x16 +func (x Int32x16) ShiftRightAndFillUpperFromMasked(y Int32x16, z Int32x16, mask Mask32x16) Int32x16 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2 -func (x Int64x2) ShiftRightAndFillUpperFromMasked(y Int64x2, z Int64x2, u Mask64x2) Int64x2 +func (x Int64x2) ShiftRightAndFillUpperFromMasked(y Int64x2, z Int64x2, mask Mask64x2) Int64x2 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2 -func (x Int64x4) ShiftRightAndFillUpperFromMasked(y Int64x4, z Int64x4, u Mask64x4) Int64x4 +func (x Int64x4) ShiftRightAndFillUpperFromMasked(y Int64x4, z Int64x4, mask Mask64x4) Int64x4 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2 -func (x Int64x8) ShiftRightAndFillUpperFromMasked(y Int64x8, z Int64x8, u Mask64x8) Int64x8 +func (x Int64x8) ShiftRightAndFillUpperFromMasked(y Int64x8, z Int64x8, mask Mask64x8) Int64x8 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2 -func (x Uint16x8) ShiftRightAndFillUpperFromMasked(y Uint16x8, z Uint16x8, u Mask16x8) Uint16x8 +func (x Uint16x8) ShiftRightAndFillUpperFromMasked(y Uint16x8, z Uint16x8, mask Mask16x8) Uint16x8 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2 -func (x Uint16x16) ShiftRightAndFillUpperFromMasked(y Uint16x16, z Uint16x16, u Mask16x16) Uint16x16 +func (x Uint16x16) ShiftRightAndFillUpperFromMasked(y Uint16x16, z Uint16x16, mask Mask16x16) Uint16x16 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2 -func (x Uint16x32) ShiftRightAndFillUpperFromMasked(y Uint16x32, z Uint16x32, u Mask16x32) Uint16x32 +func (x Uint16x32) ShiftRightAndFillUpperFromMasked(y Uint16x32, z Uint16x32, mask Mask16x32) Uint16x32 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2 -func (x Uint32x4) ShiftRightAndFillUpperFromMasked(y Uint32x4, z Uint32x4, u Mask32x4) Uint32x4 +func (x Uint32x4) ShiftRightAndFillUpperFromMasked(y Uint32x4, z Uint32x4, mask Mask32x4) Uint32x4 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2 -func (x Uint32x8) ShiftRightAndFillUpperFromMasked(y Uint32x8, z Uint32x8, u Mask32x8) Uint32x8 +func (x Uint32x8) ShiftRightAndFillUpperFromMasked(y Uint32x8, z Uint32x8, mask Mask32x8) Uint32x8 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2 -func (x Uint32x16) ShiftRightAndFillUpperFromMasked(y Uint32x16, z Uint32x16, u Mask32x16) Uint32x16 +func (x Uint32x16) ShiftRightAndFillUpperFromMasked(y Uint32x16, z Uint32x16, mask Mask32x16) Uint32x16 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2 -func (x Uint64x2) ShiftRightAndFillUpperFromMasked(y Uint64x2, z Uint64x2, u Mask64x2) Uint64x2 +func (x Uint64x2) ShiftRightAndFillUpperFromMasked(y Uint64x2, z Uint64x2, mask Mask64x2) Uint64x2 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2 -func (x Uint64x4) ShiftRightAndFillUpperFromMasked(y Uint64x4, z Uint64x4, u Mask64x4) Uint64x4 +func (x Uint64x4) ShiftRightAndFillUpperFromMasked(y Uint64x4, z Uint64x4, mask Mask64x4) Uint64x4 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2 -func (x Uint64x8) ShiftRightAndFillUpperFromMasked(y Uint64x8, z Uint64x8, u Mask64x8) Uint64x8 +func (x Uint64x8) ShiftRightAndFillUpperFromMasked(y Uint64x8, z Uint64x8, mask Mask64x8) Uint64x8 /* ShiftRightMasked */ // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAVW, CPU Feature: AVX512BW -func (x Int16x8) ShiftRightMasked(y Int16x8, z Mask16x8) Int16x8 +func (x Int16x8) ShiftRightMasked(y Int16x8, mask Mask16x8) Int16x8 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAVW, CPU Feature: AVX512BW -func (x Int16x16) ShiftRightMasked(y Int16x16, z Mask16x16) Int16x16 +func (x Int16x16) ShiftRightMasked(y Int16x16, mask Mask16x16) Int16x16 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAVW, CPU Feature: AVX512BW -func (x Int16x32) ShiftRightMasked(y Int16x32, z Mask16x32) Int16x32 +func (x Int16x32) ShiftRightMasked(y Int16x32, mask Mask16x32) Int16x32 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAVD, CPU Feature: AVX512F -func (x Int32x4) ShiftRightMasked(y Int32x4, z Mask32x4) Int32x4 +func (x Int32x4) ShiftRightMasked(y Int32x4, mask Mask32x4) Int32x4 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAVD, CPU Feature: AVX512F -func (x Int32x8) ShiftRightMasked(y Int32x8, z Mask32x8) Int32x8 +func (x Int32x8) ShiftRightMasked(y Int32x8, mask Mask32x8) Int32x8 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAVD, CPU Feature: AVX512F -func (x Int32x16) ShiftRightMasked(y Int32x16, z Mask32x16) Int32x16 +func (x Int32x16) ShiftRightMasked(y Int32x16, mask Mask32x16) Int32x16 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAVQ, CPU Feature: AVX512F -func (x Int64x2) ShiftRightMasked(y Int64x2, z Mask64x2) Int64x2 +func (x Int64x2) ShiftRightMasked(y Int64x2, mask Mask64x2) Int64x2 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAVQ, CPU Feature: AVX512F -func (x Int64x4) ShiftRightMasked(y Int64x4, z Mask64x4) Int64x4 +func (x Int64x4) ShiftRightMasked(y Int64x4, mask Mask64x4) Int64x4 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAVQ, CPU Feature: AVX512F -func (x Int64x8) ShiftRightMasked(y Int64x8, z Mask64x8) Int64x8 +func (x Int64x8) ShiftRightMasked(y Int64x8, mask Mask64x8) Int64x8 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. // // Asm: VPSRLVW, CPU Feature: AVX512BW -func (x Uint16x8) ShiftRightMasked(y Uint16x8, z Mask16x8) Uint16x8 +func (x Uint16x8) ShiftRightMasked(y Uint16x8, mask Mask16x8) Uint16x8 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. // // Asm: VPSRLVW, CPU Feature: AVX512BW -func (x Uint16x16) ShiftRightMasked(y Uint16x16, z Mask16x16) Uint16x16 +func (x Uint16x16) ShiftRightMasked(y Uint16x16, mask Mask16x16) Uint16x16 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. // // Asm: VPSRLVW, CPU Feature: AVX512BW -func (x Uint16x32) ShiftRightMasked(y Uint16x32, z Mask16x32) Uint16x32 +func (x Uint16x32) ShiftRightMasked(y Uint16x32, mask Mask16x32) Uint16x32 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. // // Asm: VPSRLVD, CPU Feature: AVX512F -func (x Uint32x4) ShiftRightMasked(y Uint32x4, z Mask32x4) Uint32x4 +func (x Uint32x4) ShiftRightMasked(y Uint32x4, mask Mask32x4) Uint32x4 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. // // Asm: VPSRLVD, CPU Feature: AVX512F -func (x Uint32x8) ShiftRightMasked(y Uint32x8, z Mask32x8) Uint32x8 +func (x Uint32x8) ShiftRightMasked(y Uint32x8, mask Mask32x8) Uint32x8 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. // // Asm: VPSRLVD, CPU Feature: AVX512F -func (x Uint32x16) ShiftRightMasked(y Uint32x16, z Mask32x16) Uint32x16 +func (x Uint32x16) ShiftRightMasked(y Uint32x16, mask Mask32x16) Uint32x16 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. // // Asm: VPSRLVQ, CPU Feature: AVX512F -func (x Uint64x2) ShiftRightMasked(y Uint64x2, z Mask64x2) Uint64x2 +func (x Uint64x2) ShiftRightMasked(y Uint64x2, mask Mask64x2) Uint64x2 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. // // Asm: VPSRLVQ, CPU Feature: AVX512F -func (x Uint64x4) ShiftRightMasked(y Uint64x4, z Mask64x4) Uint64x4 +func (x Uint64x4) ShiftRightMasked(y Uint64x4, mask Mask64x4) Uint64x4 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. // // Asm: VPSRLVQ, CPU Feature: AVX512F -func (x Uint64x8) ShiftRightMasked(y Uint64x8, z Mask64x8) Uint64x8 +func (x Uint64x8) ShiftRightMasked(y Uint64x8, mask Mask64x8) Uint64x8 /* Sign */ @@ -9540,32 +9540,32 @@ func (x Float64x8) Sqrt() Float64x8 // SqrtMasked computes the square root of each element. // // Asm: VSQRTPS, CPU Feature: AVX512F -func (x Float32x4) SqrtMasked(y Mask32x4) Float32x4 +func (x Float32x4) SqrtMasked(mask Mask32x4) Float32x4 // SqrtMasked computes the square root of each element. // // Asm: VSQRTPS, CPU Feature: AVX512F -func (x Float32x8) SqrtMasked(y Mask32x8) Float32x8 +func (x Float32x8) SqrtMasked(mask Mask32x8) Float32x8 // SqrtMasked computes the square root of each element. // // Asm: VSQRTPS, CPU Feature: AVX512F -func (x Float32x16) SqrtMasked(y Mask32x16) Float32x16 +func (x Float32x16) SqrtMasked(mask Mask32x16) Float32x16 // SqrtMasked computes the square root of each element. // // Asm: VSQRTPD, CPU Feature: AVX512F -func (x Float64x2) SqrtMasked(y Mask64x2) Float64x2 +func (x Float64x2) SqrtMasked(mask Mask64x2) Float64x2 // SqrtMasked computes the square root of each element. // // Asm: VSQRTPD, CPU Feature: AVX512F -func (x Float64x4) SqrtMasked(y Mask64x4) Float64x4 +func (x Float64x4) SqrtMasked(mask Mask64x4) Float64x4 // SqrtMasked computes the square root of each element. // // Asm: VSQRTPD, CPU Feature: AVX512F -func (x Float64x8) SqrtMasked(y Mask64x8) Float64x8 +func (x Float64x8) SqrtMasked(mask Mask64x8) Float64x8 /* Sub */ @@ -9724,152 +9724,152 @@ func (x Uint64x8) Sub(y Uint64x8) Uint64x8 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VSUBPS, CPU Feature: AVX512F -func (x Float32x4) SubMasked(y Float32x4, z Mask32x4) Float32x4 +func (x Float32x4) SubMasked(y Float32x4, mask Mask32x4) Float32x4 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VSUBPS, CPU Feature: AVX512F -func (x Float32x8) SubMasked(y Float32x8, z Mask32x8) Float32x8 +func (x Float32x8) SubMasked(y Float32x8, mask Mask32x8) Float32x8 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VSUBPS, CPU Feature: AVX512F -func (x Float32x16) SubMasked(y Float32x16, z Mask32x16) Float32x16 +func (x Float32x16) SubMasked(y Float32x16, mask Mask32x16) Float32x16 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VSUBPD, CPU Feature: AVX512F -func (x Float64x2) SubMasked(y Float64x2, z Mask64x2) Float64x2 +func (x Float64x2) SubMasked(y Float64x2, mask Mask64x2) Float64x2 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VSUBPD, CPU Feature: AVX512F -func (x Float64x4) SubMasked(y Float64x4, z Mask64x4) Float64x4 +func (x Float64x4) SubMasked(y Float64x4, mask Mask64x4) Float64x4 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VSUBPD, CPU Feature: AVX512F -func (x Float64x8) SubMasked(y Float64x8, z Mask64x8) Float64x8 +func (x Float64x8) SubMasked(y Float64x8, mask Mask64x8) Float64x8 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBB, CPU Feature: AVX512BW -func (x Int8x16) SubMasked(y Int8x16, z Mask8x16) Int8x16 +func (x Int8x16) SubMasked(y Int8x16, mask Mask8x16) Int8x16 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBB, CPU Feature: AVX512BW -func (x Int8x32) SubMasked(y Int8x32, z Mask8x32) Int8x32 +func (x Int8x32) SubMasked(y Int8x32, mask Mask8x32) Int8x32 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBB, CPU Feature: AVX512BW -func (x Int8x64) SubMasked(y Int8x64, z Mask8x64) Int8x64 +func (x Int8x64) SubMasked(y Int8x64, mask Mask8x64) Int8x64 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBW, CPU Feature: AVX512BW -func (x Int16x8) SubMasked(y Int16x8, z Mask16x8) Int16x8 +func (x Int16x8) SubMasked(y Int16x8, mask Mask16x8) Int16x8 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBW, CPU Feature: AVX512BW -func (x Int16x16) SubMasked(y Int16x16, z Mask16x16) Int16x16 +func (x Int16x16) SubMasked(y Int16x16, mask Mask16x16) Int16x16 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBW, CPU Feature: AVX512BW -func (x Int16x32) SubMasked(y Int16x32, z Mask16x32) Int16x32 +func (x Int16x32) SubMasked(y Int16x32, mask Mask16x32) Int16x32 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBD, CPU Feature: AVX512F -func (x Int32x4) SubMasked(y Int32x4, z Mask32x4) Int32x4 +func (x Int32x4) SubMasked(y Int32x4, mask Mask32x4) Int32x4 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBD, CPU Feature: AVX512F -func (x Int32x8) SubMasked(y Int32x8, z Mask32x8) Int32x8 +func (x Int32x8) SubMasked(y Int32x8, mask Mask32x8) Int32x8 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBD, CPU Feature: AVX512F -func (x Int32x16) SubMasked(y Int32x16, z Mask32x16) Int32x16 +func (x Int32x16) SubMasked(y Int32x16, mask Mask32x16) Int32x16 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBQ, CPU Feature: AVX512F -func (x Int64x2) SubMasked(y Int64x2, z Mask64x2) Int64x2 +func (x Int64x2) SubMasked(y Int64x2, mask Mask64x2) Int64x2 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBQ, CPU Feature: AVX512F -func (x Int64x4) SubMasked(y Int64x4, z Mask64x4) Int64x4 +func (x Int64x4) SubMasked(y Int64x4, mask Mask64x4) Int64x4 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBQ, CPU Feature: AVX512F -func (x Int64x8) SubMasked(y Int64x8, z Mask64x8) Int64x8 +func (x Int64x8) SubMasked(y Int64x8, mask Mask64x8) Int64x8 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBB, CPU Feature: AVX512BW -func (x Uint8x16) SubMasked(y Uint8x16, z Mask8x16) Uint8x16 +func (x Uint8x16) SubMasked(y Uint8x16, mask Mask8x16) Uint8x16 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBB, CPU Feature: AVX512BW -func (x Uint8x32) SubMasked(y Uint8x32, z Mask8x32) Uint8x32 +func (x Uint8x32) SubMasked(y Uint8x32, mask Mask8x32) Uint8x32 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBB, CPU Feature: AVX512BW -func (x Uint8x64) SubMasked(y Uint8x64, z Mask8x64) Uint8x64 +func (x Uint8x64) SubMasked(y Uint8x64, mask Mask8x64) Uint8x64 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBW, CPU Feature: AVX512BW -func (x Uint16x8) SubMasked(y Uint16x8, z Mask16x8) Uint16x8 +func (x Uint16x8) SubMasked(y Uint16x8, mask Mask16x8) Uint16x8 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBW, CPU Feature: AVX512BW -func (x Uint16x16) SubMasked(y Uint16x16, z Mask16x16) Uint16x16 +func (x Uint16x16) SubMasked(y Uint16x16, mask Mask16x16) Uint16x16 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBW, CPU Feature: AVX512BW -func (x Uint16x32) SubMasked(y Uint16x32, z Mask16x32) Uint16x32 +func (x Uint16x32) SubMasked(y Uint16x32, mask Mask16x32) Uint16x32 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBD, CPU Feature: AVX512F -func (x Uint32x4) SubMasked(y Uint32x4, z Mask32x4) Uint32x4 +func (x Uint32x4) SubMasked(y Uint32x4, mask Mask32x4) Uint32x4 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBD, CPU Feature: AVX512F -func (x Uint32x8) SubMasked(y Uint32x8, z Mask32x8) Uint32x8 +func (x Uint32x8) SubMasked(y Uint32x8, mask Mask32x8) Uint32x8 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBD, CPU Feature: AVX512F -func (x Uint32x16) SubMasked(y Uint32x16, z Mask32x16) Uint32x16 +func (x Uint32x16) SubMasked(y Uint32x16, mask Mask32x16) Uint32x16 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBQ, CPU Feature: AVX512F -func (x Uint64x2) SubMasked(y Uint64x2, z Mask64x2) Uint64x2 +func (x Uint64x2) SubMasked(y Uint64x2, mask Mask64x2) Uint64x2 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBQ, CPU Feature: AVX512F -func (x Uint64x4) SubMasked(y Uint64x4, z Mask64x4) Uint64x4 +func (x Uint64x4) SubMasked(y Uint64x4, mask Mask64x4) Uint64x4 // SubMasked subtracts corresponding elements of two vectors. // // Asm: VPSUBQ, CPU Feature: AVX512F -func (x Uint64x8) SubMasked(y Uint64x8, z Mask64x8) Uint64x8 +func (x Uint64x8) SubMasked(y Uint64x8, mask Mask64x8) Uint64x8 /* Trunc */ @@ -9944,42 +9944,42 @@ func (x Float64x8) TruncWithPrecision(prec uint8) Float64x8 // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPS, CPU Feature: AVX512F -func (x Float32x4) TruncWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4 +func (x Float32x4) TruncWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4 // TruncWithPrecisionMasked truncates elements with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPS, CPU Feature: AVX512F -func (x Float32x8) TruncWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8 +func (x Float32x8) TruncWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8 // TruncWithPrecisionMasked truncates elements with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPS, CPU Feature: AVX512F -func (x Float32x16) TruncWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16 +func (x Float32x16) TruncWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16 // TruncWithPrecisionMasked truncates elements with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPD, CPU Feature: AVX512F -func (x Float64x2) TruncWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2 +func (x Float64x2) TruncWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2 // TruncWithPrecisionMasked truncates elements with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPD, CPU Feature: AVX512F -func (x Float64x4) TruncWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4 +func (x Float64x4) TruncWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4 // TruncWithPrecisionMasked truncates elements with specified precision. // // prec is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: VRNDSCALEPD, CPU Feature: AVX512F -func (x Float64x8) TruncWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8 +func (x Float64x8) TruncWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8 /* UnsignedSignedQuadDotProdAccumulate */ @@ -10018,32 +10018,32 @@ func (x Uint32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Ui // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x. // // Asm: VPDPBUSD, CPU Feature: AVX512VNNI -func (x Int32x4) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, u Mask32x4) Int32x4 +func (x Int32x4) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, mask Mask32x4) Int32x4 // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x. // // Asm: VPDPBUSD, CPU Feature: AVX512VNNI -func (x Int32x8) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, u Mask32x8) Int32x8 +func (x Int32x8) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, mask Mask32x8) Int32x8 // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x. // // Asm: VPDPBUSD, CPU Feature: AVX512VNNI -func (x Int32x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, u Mask32x16) Int32x16 +func (x Int32x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, mask Mask32x16) Int32x16 // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x. // // Asm: VPDPBUSD, CPU Feature: AVX512VNNI -func (x Uint32x4) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, u Mask32x4) Uint32x4 +func (x Uint32x4) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, mask Mask32x4) Uint32x4 // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x. // // Asm: VPDPBUSD, CPU Feature: AVX512VNNI -func (x Uint32x8) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, u Mask32x8) Uint32x8 +func (x Uint32x8) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, mask Mask32x8) Uint32x8 // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x. // // Asm: VPDPBUSD, CPU Feature: AVX512VNNI -func (x Uint32x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, u Mask32x16) Uint32x16 +func (x Uint32x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, mask Mask32x16) Uint32x16 /* Xor */ @@ -10152,62 +10152,62 @@ func (x Uint64x8) Xor(y Uint64x8) Uint64x8 // XorMasked performs a masked bitwise XOR operation between two vectors. // // Asm: VPXORD, CPU Feature: AVX512F -func (x Int32x4) XorMasked(y Int32x4, z Mask32x4) Int32x4 +func (x Int32x4) XorMasked(y Int32x4, mask Mask32x4) Int32x4 // XorMasked performs a masked bitwise XOR operation between two vectors. // // Asm: VPXORD, CPU Feature: AVX512F -func (x Int32x8) XorMasked(y Int32x8, z Mask32x8) Int32x8 +func (x Int32x8) XorMasked(y Int32x8, mask Mask32x8) Int32x8 // XorMasked performs a masked bitwise XOR operation between two vectors. // // Asm: VPXORD, CPU Feature: AVX512F -func (x Int32x16) XorMasked(y Int32x16, z Mask32x16) Int32x16 +func (x Int32x16) XorMasked(y Int32x16, mask Mask32x16) Int32x16 // XorMasked performs a masked bitwise XOR operation between two vectors. // // Asm: VPXORQ, CPU Feature: AVX512F -func (x Int64x2) XorMasked(y Int64x2, z Mask64x2) Int64x2 +func (x Int64x2) XorMasked(y Int64x2, mask Mask64x2) Int64x2 // XorMasked performs a masked bitwise XOR operation between two vectors. // // Asm: VPXORQ, CPU Feature: AVX512F -func (x Int64x4) XorMasked(y Int64x4, z Mask64x4) Int64x4 +func (x Int64x4) XorMasked(y Int64x4, mask Mask64x4) Int64x4 // XorMasked performs a masked bitwise XOR operation between two vectors. // // Asm: VPXORQ, CPU Feature: AVX512F -func (x Int64x8) XorMasked(y Int64x8, z Mask64x8) Int64x8 +func (x Int64x8) XorMasked(y Int64x8, mask Mask64x8) Int64x8 // XorMasked performs a masked bitwise XOR operation between two vectors. // // Asm: VPXORD, CPU Feature: AVX512F -func (x Uint32x4) XorMasked(y Uint32x4, z Mask32x4) Uint32x4 +func (x Uint32x4) XorMasked(y Uint32x4, mask Mask32x4) Uint32x4 // XorMasked performs a masked bitwise XOR operation between two vectors. // // Asm: VPXORD, CPU Feature: AVX512F -func (x Uint32x8) XorMasked(y Uint32x8, z Mask32x8) Uint32x8 +func (x Uint32x8) XorMasked(y Uint32x8, mask Mask32x8) Uint32x8 // XorMasked performs a masked bitwise XOR operation between two vectors. // // Asm: VPXORD, CPU Feature: AVX512F -func (x Uint32x16) XorMasked(y Uint32x16, z Mask32x16) Uint32x16 +func (x Uint32x16) XorMasked(y Uint32x16, mask Mask32x16) Uint32x16 // XorMasked performs a masked bitwise XOR operation between two vectors. // // Asm: VPXORQ, CPU Feature: AVX512F -func (x Uint64x2) XorMasked(y Uint64x2, z Mask64x2) Uint64x2 +func (x Uint64x2) XorMasked(y Uint64x2, mask Mask64x2) Uint64x2 // XorMasked performs a masked bitwise XOR operation between two vectors. // // Asm: VPXORQ, CPU Feature: AVX512F -func (x Uint64x4) XorMasked(y Uint64x4, z Mask64x4) Uint64x4 +func (x Uint64x4) XorMasked(y Uint64x4, mask Mask64x4) Uint64x4 // XorMasked performs a masked bitwise XOR operation between two vectors. // // Asm: VPXORQ, CPU Feature: AVX512F -func (x Uint64x8) XorMasked(y Uint64x8, z Mask64x8) Uint64x8 +func (x Uint64x8) XorMasked(y Uint64x8, mask Mask64x8) Uint64x8 // Float64x2 converts from Float32x4 to Float64x2 func (from Float32x4) AsFloat64x2() (to Float64x2) -- 2.52.0