[dev.simd] simd: default mask param's name to mask

author Junyang Shao <shaojunyang@google.com>

Mon, 14 Jul 2025 19:44:57 +0000 (19:44 +0000)

committer Junyang Shao <shaojunyang@google.com>

Tue, 15 Jul 2025 21:54:09 +0000 (14:54 -0700)
author Junyang Shao <shaojunyang@google.com>
Mon, 14 Jul 2025 19:44:57 +0000 (19:44 +0000)
committer Junyang Shao <shaojunyang@google.com>
Tue, 15 Jul 2025 21:54:09 +0000 (14:54 -0700)
diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go

index 29899f8cb13a9c0e2f473523eda134641e2f80f9..ebb626358f8a244a655b488a4af36308da12d31b 100644 (file)
--- a/src/simd/ops_amd64.go
+++ b/src/simd/ops_amd64.go
@@ -71,62 +71,62 @@ func (x Int64x8) Absolute() Int64x8
  // AbsoluteMasked computes the absolute value of each element.
  //
  // Asm: VPABSB, CPU Feature: AVX512BW
-func (x Int8x16) AbsoluteMasked(y Mask8x16) Int8x16
+func (x Int8x16) AbsoluteMasked(mask Mask8x16) Int8x16
  
  // AbsoluteMasked computes the absolute value of each element.
  //
  // Asm: VPABSB, CPU Feature: AVX512BW
-func (x Int8x32) AbsoluteMasked(y Mask8x32) Int8x32
+func (x Int8x32) AbsoluteMasked(mask Mask8x32) Int8x32
  
  // AbsoluteMasked computes the absolute value of each element.
  //
  // Asm: VPABSB, CPU Feature: AVX512BW
-func (x Int8x64) AbsoluteMasked(y Mask8x64) Int8x64
+func (x Int8x64) AbsoluteMasked(mask Mask8x64) Int8x64
  
  // AbsoluteMasked computes the absolute value of each element.
  //
  // Asm: VPABSW, CPU Feature: AVX512BW
-func (x Int16x8) AbsoluteMasked(y Mask16x8) Int16x8
+func (x Int16x8) AbsoluteMasked(mask Mask16x8) Int16x8
  
  // AbsoluteMasked computes the absolute value of each element.
  //
  // Asm: VPABSW, CPU Feature: AVX512BW
-func (x Int16x16) AbsoluteMasked(y Mask16x16) Int16x16
+func (x Int16x16) AbsoluteMasked(mask Mask16x16) Int16x16
  
  // AbsoluteMasked computes the absolute value of each element.
  //
  // Asm: VPABSW, CPU Feature: AVX512BW
-func (x Int16x32) AbsoluteMasked(y Mask16x32) Int16x32
+func (x Int16x32) AbsoluteMasked(mask Mask16x32) Int16x32
  
  // AbsoluteMasked computes the absolute value of each element.
  //
  // Asm: VPABSD, CPU Feature: AVX512F
-func (x Int32x4) AbsoluteMasked(y Mask32x4) Int32x4
+func (x Int32x4) AbsoluteMasked(mask Mask32x4) Int32x4
  
  // AbsoluteMasked computes the absolute value of each element.
  //
  // Asm: VPABSD, CPU Feature: AVX512F
-func (x Int32x8) AbsoluteMasked(y Mask32x8) Int32x8
+func (x Int32x8) AbsoluteMasked(mask Mask32x8) Int32x8
  
  // AbsoluteMasked computes the absolute value of each element.
  //
  // Asm: VPABSD, CPU Feature: AVX512F
-func (x Int32x16) AbsoluteMasked(y Mask32x16) Int32x16
+func (x Int32x16) AbsoluteMasked(mask Mask32x16) Int32x16
  
  // AbsoluteMasked computes the absolute value of each element.
  //
  // Asm: VPABSQ, CPU Feature: AVX512F
-func (x Int64x2) AbsoluteMasked(y Mask64x2) Int64x2
+func (x Int64x2) AbsoluteMasked(mask Mask64x2) Int64x2
  
  // AbsoluteMasked computes the absolute value of each element.
  //
  // Asm: VPABSQ, CPU Feature: AVX512F
-func (x Int64x4) AbsoluteMasked(y Mask64x4) Int64x4
+func (x Int64x4) AbsoluteMasked(mask Mask64x4) Int64x4
  
  // AbsoluteMasked computes the absolute value of each element.
  //
  // Asm: VPABSQ, CPU Feature: AVX512F
-func (x Int64x8) AbsoluteMasked(y Mask64x8) Int64x8
+func (x Int64x8) AbsoluteMasked(mask Mask64x8) Int64x8
  
  /* Add */
  
@@ -285,152 +285,152 @@ func (x Uint64x8) Add(y Uint64x8) Uint64x8
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VADDPS, CPU Feature: AVX512F
-func (x Float32x4) AddMasked(y Float32x4, z Mask32x4) Float32x4
+func (x Float32x4) AddMasked(y Float32x4, mask Mask32x4) Float32x4
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VADDPS, CPU Feature: AVX512F
-func (x Float32x8) AddMasked(y Float32x8, z Mask32x8) Float32x8
+func (x Float32x8) AddMasked(y Float32x8, mask Mask32x8) Float32x8
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VADDPS, CPU Feature: AVX512F
-func (x Float32x16) AddMasked(y Float32x16, z Mask32x16) Float32x16
+func (x Float32x16) AddMasked(y Float32x16, mask Mask32x16) Float32x16
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VADDPD, CPU Feature: AVX512F
-func (x Float64x2) AddMasked(y Float64x2, z Mask64x2) Float64x2
+func (x Float64x2) AddMasked(y Float64x2, mask Mask64x2) Float64x2
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VADDPD, CPU Feature: AVX512F
-func (x Float64x4) AddMasked(y Float64x4, z Mask64x4) Float64x4
+func (x Float64x4) AddMasked(y Float64x4, mask Mask64x4) Float64x4
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VADDPD, CPU Feature: AVX512F
-func (x Float64x8) AddMasked(y Float64x8, z Mask64x8) Float64x8
+func (x Float64x8) AddMasked(y Float64x8, mask Mask64x8) Float64x8
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDB, CPU Feature: AVX512BW
-func (x Int8x16) AddMasked(y Int8x16, z Mask8x16) Int8x16
+func (x Int8x16) AddMasked(y Int8x16, mask Mask8x16) Int8x16
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDB, CPU Feature: AVX512BW
-func (x Int8x32) AddMasked(y Int8x32, z Mask8x32) Int8x32
+func (x Int8x32) AddMasked(y Int8x32, mask Mask8x32) Int8x32
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDB, CPU Feature: AVX512BW
-func (x Int8x64) AddMasked(y Int8x64, z Mask8x64) Int8x64
+func (x Int8x64) AddMasked(y Int8x64, mask Mask8x64) Int8x64
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDW, CPU Feature: AVX512BW
-func (x Int16x8) AddMasked(y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) AddMasked(y Int16x8, mask Mask16x8) Int16x8
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDW, CPU Feature: AVX512BW
-func (x Int16x16) AddMasked(y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) AddMasked(y Int16x16, mask Mask16x16) Int16x16
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDW, CPU Feature: AVX512BW
-func (x Int16x32) AddMasked(y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) AddMasked(y Int16x32, mask Mask16x32) Int16x32
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDD, CPU Feature: AVX512F
-func (x Int32x4) AddMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) AddMasked(y Int32x4, mask Mask32x4) Int32x4
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDD, CPU Feature: AVX512F
-func (x Int32x8) AddMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) AddMasked(y Int32x8, mask Mask32x8) Int32x8
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDD, CPU Feature: AVX512F
-func (x Int32x16) AddMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) AddMasked(y Int32x16, mask Mask32x16) Int32x16
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDQ, CPU Feature: AVX512F
-func (x Int64x2) AddMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) AddMasked(y Int64x2, mask Mask64x2) Int64x2
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDQ, CPU Feature: AVX512F
-func (x Int64x4) AddMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) AddMasked(y Int64x4, mask Mask64x4) Int64x4
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDQ, CPU Feature: AVX512F
-func (x Int64x8) AddMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) AddMasked(y Int64x8, mask Mask64x8) Int64x8
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDB, CPU Feature: AVX512BW
-func (x Uint8x16) AddMasked(y Uint8x16, z Mask8x16) Uint8x16
+func (x Uint8x16) AddMasked(y Uint8x16, mask Mask8x16) Uint8x16
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDB, CPU Feature: AVX512BW
-func (x Uint8x32) AddMasked(y Uint8x32, z Mask8x32) Uint8x32
+func (x Uint8x32) AddMasked(y Uint8x32, mask Mask8x32) Uint8x32
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDB, CPU Feature: AVX512BW
-func (x Uint8x64) AddMasked(y Uint8x64, z Mask8x64) Uint8x64
+func (x Uint8x64) AddMasked(y Uint8x64, mask Mask8x64) Uint8x64
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDW, CPU Feature: AVX512BW
-func (x Uint16x8) AddMasked(y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) AddMasked(y Uint16x8, mask Mask16x8) Uint16x8
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDW, CPU Feature: AVX512BW
-func (x Uint16x16) AddMasked(y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) AddMasked(y Uint16x16, mask Mask16x16) Uint16x16
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDW, CPU Feature: AVX512BW
-func (x Uint16x32) AddMasked(y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) AddMasked(y Uint16x32, mask Mask16x32) Uint16x32
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDD, CPU Feature: AVX512F
-func (x Uint32x4) AddMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) AddMasked(y Uint32x4, mask Mask32x4) Uint32x4
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDD, CPU Feature: AVX512F
-func (x Uint32x8) AddMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) AddMasked(y Uint32x8, mask Mask32x8) Uint32x8
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDD, CPU Feature: AVX512F
-func (x Uint32x16) AddMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) AddMasked(y Uint32x16, mask Mask32x16) Uint32x16
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDQ, CPU Feature: AVX512F
-func (x Uint64x2) AddMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) AddMasked(y Uint64x2, mask Mask64x2) Uint64x2
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDQ, CPU Feature: AVX512F
-func (x Uint64x4) AddMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) AddMasked(y Uint64x4, mask Mask64x4) Uint64x4
  
  // AddMasked adds corresponding elements of two vectors.
  //
  // Asm: VPADDQ, CPU Feature: AVX512F
-func (x Uint64x8) AddMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) AddMasked(y Uint64x8, mask Mask64x8) Uint64x8
  
  /* AddSub */
  
@@ -561,62 +561,62 @@ func (x Uint64x8) And(y Uint64x8) Uint64x8
  // AndMasked performs a masked bitwise AND operation between two vectors.
  //
  // Asm: VPANDD, CPU Feature: AVX512F
-func (x Int32x4) AndMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) AndMasked(y Int32x4, mask Mask32x4) Int32x4
  
  // AndMasked performs a masked bitwise AND operation between two vectors.
  //
  // Asm: VPANDD, CPU Feature: AVX512F
-func (x Int32x8) AndMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) AndMasked(y Int32x8, mask Mask32x8) Int32x8
  
  // AndMasked performs a masked bitwise AND operation between two vectors.
  //
  // Asm: VPANDD, CPU Feature: AVX512F
-func (x Int32x16) AndMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) AndMasked(y Int32x16, mask Mask32x16) Int32x16
  
  // AndMasked performs a masked bitwise AND operation between two vectors.
  //
  // Asm: VPANDQ, CPU Feature: AVX512F
-func (x Int64x2) AndMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) AndMasked(y Int64x2, mask Mask64x2) Int64x2
  
  // AndMasked performs a masked bitwise AND operation between two vectors.
  //
  // Asm: VPANDQ, CPU Feature: AVX512F
-func (x Int64x4) AndMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) AndMasked(y Int64x4, mask Mask64x4) Int64x4
  
  // AndMasked performs a masked bitwise AND operation between two vectors.
  //
  // Asm: VPANDQ, CPU Feature: AVX512F
-func (x Int64x8) AndMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) AndMasked(y Int64x8, mask Mask64x8) Int64x8
  
  // AndMasked performs a masked bitwise AND operation between two vectors.
  //
  // Asm: VPANDD, CPU Feature: AVX512F
-func (x Uint32x4) AndMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) AndMasked(y Uint32x4, mask Mask32x4) Uint32x4
  
  // AndMasked performs a masked bitwise AND operation between two vectors.
  //
  // Asm: VPANDD, CPU Feature: AVX512F
-func (x Uint32x8) AndMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) AndMasked(y Uint32x8, mask Mask32x8) Uint32x8
  
  // AndMasked performs a masked bitwise AND operation between two vectors.
  //
  // Asm: VPANDD, CPU Feature: AVX512F
-func (x Uint32x16) AndMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) AndMasked(y Uint32x16, mask Mask32x16) Uint32x16
  
  // AndMasked performs a masked bitwise AND operation between two vectors.
  //
  // Asm: VPANDQ, CPU Feature: AVX512F
-func (x Uint64x2) AndMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) AndMasked(y Uint64x2, mask Mask64x2) Uint64x2
  
  // AndMasked performs a masked bitwise AND operation between two vectors.
  //
  // Asm: VPANDQ, CPU Feature: AVX512F
-func (x Uint64x4) AndMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) AndMasked(y Uint64x4, mask Mask64x4) Uint64x4
  
  // AndMasked performs a masked bitwise AND operation between two vectors.
  //
  // Asm: VPANDQ, CPU Feature: AVX512F
-func (x Uint64x8) AndMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) AndMasked(y Uint64x8, mask Mask64x8) Uint64x8
  
  /* AndNot */
  
@@ -725,62 +725,62 @@ func (x Uint64x8) AndNot(y Uint64x8) Uint64x8
  // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
  //
  // Asm: VPANDND, CPU Feature: AVX512F
-func (x Int32x4) AndNotMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) AndNotMasked(y Int32x4, mask Mask32x4) Int32x4
  
  // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
  //
  // Asm: VPANDND, CPU Feature: AVX512F
-func (x Int32x8) AndNotMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) AndNotMasked(y Int32x8, mask Mask32x8) Int32x8
  
  // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
  //
  // Asm: VPANDND, CPU Feature: AVX512F
-func (x Int32x16) AndNotMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) AndNotMasked(y Int32x16, mask Mask32x16) Int32x16
  
  // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
  //
  // Asm: VPANDNQ, CPU Feature: AVX512F
-func (x Int64x2) AndNotMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) AndNotMasked(y Int64x2, mask Mask64x2) Int64x2
  
  // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
  //
  // Asm: VPANDNQ, CPU Feature: AVX512F
-func (x Int64x4) AndNotMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) AndNotMasked(y Int64x4, mask Mask64x4) Int64x4
  
  // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
  //
  // Asm: VPANDNQ, CPU Feature: AVX512F
-func (x Int64x8) AndNotMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) AndNotMasked(y Int64x8, mask Mask64x8) Int64x8
  
  // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
  //
  // Asm: VPANDND, CPU Feature: AVX512F
-func (x Uint32x4) AndNotMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) AndNotMasked(y Uint32x4, mask Mask32x4) Uint32x4
  
  // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
  //
  // Asm: VPANDND, CPU Feature: AVX512F
-func (x Uint32x8) AndNotMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) AndNotMasked(y Uint32x8, mask Mask32x8) Uint32x8
  
  // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
  //
  // Asm: VPANDND, CPU Feature: AVX512F
-func (x Uint32x16) AndNotMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) AndNotMasked(y Uint32x16, mask Mask32x16) Uint32x16
  
  // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
  //
  // Asm: VPANDNQ, CPU Feature: AVX512F
-func (x Uint64x2) AndNotMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) AndNotMasked(y Uint64x2, mask Mask64x2) Uint64x2
  
  // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
  //
  // Asm: VPANDNQ, CPU Feature: AVX512F
-func (x Uint64x4) AndNotMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) AndNotMasked(y Uint64x4, mask Mask64x4) Uint64x4
  
  // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
  //
  // Asm: VPANDNQ, CPU Feature: AVX512F
-func (x Uint64x8) AndNotMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) AndNotMasked(y Uint64x8, mask Mask64x8) Uint64x8
  
  /* ApproximateReciprocal */
  
@@ -819,32 +819,32 @@ func (x Float64x8) ApproximateReciprocal() Float64x8
  // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
  //
  // Asm: VRCP14PS, CPU Feature: AVX512F
-func (x Float32x4) ApproximateReciprocalMasked(y Mask32x4) Float32x4
+func (x Float32x4) ApproximateReciprocalMasked(mask Mask32x4) Float32x4
  
  // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
  //
  // Asm: VRCP14PS, CPU Feature: AVX512F
-func (x Float32x8) ApproximateReciprocalMasked(y Mask32x8) Float32x8
+func (x Float32x8) ApproximateReciprocalMasked(mask Mask32x8) Float32x8
  
  // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
  //
  // Asm: VRCP14PS, CPU Feature: AVX512F
-func (x Float32x16) ApproximateReciprocalMasked(y Mask32x16) Float32x16
+func (x Float32x16) ApproximateReciprocalMasked(mask Mask32x16) Float32x16
  
  // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
  //
  // Asm: VRCP14PD, CPU Feature: AVX512F
-func (x Float64x2) ApproximateReciprocalMasked(y Mask64x2) Float64x2
+func (x Float64x2) ApproximateReciprocalMasked(mask Mask64x2) Float64x2
  
  // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
  //
  // Asm: VRCP14PD, CPU Feature: AVX512F
-func (x Float64x4) ApproximateReciprocalMasked(y Mask64x4) Float64x4
+func (x Float64x4) ApproximateReciprocalMasked(mask Mask64x4) Float64x4
  
  // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
  //
  // Asm: VRCP14PD, CPU Feature: AVX512F
-func (x Float64x8) ApproximateReciprocalMasked(y Mask64x8) Float64x8
+func (x Float64x8) ApproximateReciprocalMasked(mask Mask64x8) Float64x8
  
  /* ApproximateReciprocalOfSqrt */
  
@@ -883,32 +883,32 @@ func (x Float64x8) ApproximateReciprocalOfSqrt() Float64x8
  // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
  //
  // Asm: VRSQRT14PS, CPU Feature: AVX512F
-func (x Float32x4) ApproximateReciprocalOfSqrtMasked(y Mask32x4) Float32x4
+func (x Float32x4) ApproximateReciprocalOfSqrtMasked(mask Mask32x4) Float32x4
  
  // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
  //
  // Asm: VRSQRT14PS, CPU Feature: AVX512F
-func (x Float32x8) ApproximateReciprocalOfSqrtMasked(y Mask32x8) Float32x8
+func (x Float32x8) ApproximateReciprocalOfSqrtMasked(mask Mask32x8) Float32x8
  
  // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
  //
  // Asm: VRSQRT14PS, CPU Feature: AVX512F
-func (x Float32x16) ApproximateReciprocalOfSqrtMasked(y Mask32x16) Float32x16
+func (x Float32x16) ApproximateReciprocalOfSqrtMasked(mask Mask32x16) Float32x16
  
  // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
  //
  // Asm: VRSQRT14PD, CPU Feature: AVX512F
-func (x Float64x2) ApproximateReciprocalOfSqrtMasked(y Mask64x2) Float64x2
+func (x Float64x2) ApproximateReciprocalOfSqrtMasked(mask Mask64x2) Float64x2
  
  // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
  //
  // Asm: VRSQRT14PD, CPU Feature: AVX512F
-func (x Float64x4) ApproximateReciprocalOfSqrtMasked(y Mask64x4) Float64x4
+func (x Float64x4) ApproximateReciprocalOfSqrtMasked(mask Mask64x4) Float64x4
  
  // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
  //
  // Asm: VRSQRT14PD, CPU Feature: AVX512F
-func (x Float64x8) ApproximateReciprocalOfSqrtMasked(y Mask64x8) Float64x8
+func (x Float64x8) ApproximateReciprocalOfSqrtMasked(mask Mask64x8) Float64x8
  
  /* Average */
  
@@ -947,32 +947,32 @@ func (x Uint16x32) Average(y Uint16x32) Uint16x32
  // AverageMasked computes the rounded average of corresponding elements.
  //
  // Asm: VPAVGB, CPU Feature: AVX512BW
-func (x Uint8x16) AverageMasked(y Uint8x16, z Mask8x16) Uint8x16
+func (x Uint8x16) AverageMasked(y Uint8x16, mask Mask8x16) Uint8x16
  
  // AverageMasked computes the rounded average of corresponding elements.
  //
  // Asm: VPAVGB, CPU Feature: AVX512BW
-func (x Uint8x32) AverageMasked(y Uint8x32, z Mask8x32) Uint8x32
+func (x Uint8x32) AverageMasked(y Uint8x32, mask Mask8x32) Uint8x32
  
  // AverageMasked computes the rounded average of corresponding elements.
  //
  // Asm: VPAVGB, CPU Feature: AVX512BW
-func (x Uint8x64) AverageMasked(y Uint8x64, z Mask8x64) Uint8x64
+func (x Uint8x64) AverageMasked(y Uint8x64, mask Mask8x64) Uint8x64
  
  // AverageMasked computes the rounded average of corresponding elements.
  //
  // Asm: VPAVGW, CPU Feature: AVX512BW
-func (x Uint16x8) AverageMasked(y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) AverageMasked(y Uint16x8, mask Mask16x8) Uint16x8
  
  // AverageMasked computes the rounded average of corresponding elements.
  //
  // Asm: VPAVGW, CPU Feature: AVX512BW
-func (x Uint16x16) AverageMasked(y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) AverageMasked(y Uint16x16, mask Mask16x16) Uint16x16
  
  // AverageMasked computes the rounded average of corresponding elements.
  //
  // Asm: VPAVGW, CPU Feature: AVX512BW
-func (x Uint16x32) AverageMasked(y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) AverageMasked(y Uint16x32, mask Mask16x32) Uint16x32
  
  /* Ceil */
  
@@ -1047,42 +1047,42 @@ func (x Float64x8) CeilWithPrecision(prec uint8) Float64x8
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x4) CeilWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
+func (x Float32x4) CeilWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
  
  // CeilWithPrecisionMasked rounds elements up with specified precision, masked.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x8) CeilWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
+func (x Float32x8) CeilWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
  
  // CeilWithPrecisionMasked rounds elements up with specified precision, masked.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x16) CeilWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
+func (x Float32x16) CeilWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
  
  // CeilWithPrecisionMasked rounds elements up with specified precision, masked.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x2) CeilWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
+func (x Float64x2) CeilWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
  
  // CeilWithPrecisionMasked rounds elements up with specified precision, masked.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x4) CeilWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
+func (x Float64x4) CeilWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
  
  // CeilWithPrecisionMasked rounds elements up with specified precision, masked.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x8) CeilWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
+func (x Float64x8) CeilWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
  
  /* DiffWithCeilWithPrecision */
  
@@ -1135,42 +1135,42 @@ func (x Float64x8) DiffWithCeilWithPrecision(prec uint8) Float64x8
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x4) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
+func (x Float32x4) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
  
  // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x8) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
+func (x Float32x8) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
  
  // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x16) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
+func (x Float32x16) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
  
  // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x2) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
+func (x Float64x2) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
  
  // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x4) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
+func (x Float64x4) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
  
  // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x8) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
+func (x Float64x8) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
  
  /* DiffWithFloorWithPrecision */
  
@@ -1223,42 +1223,42 @@ func (x Float64x8) DiffWithFloorWithPrecision(prec uint8) Float64x8
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x4) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
+func (x Float32x4) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
  
  // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x8) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
+func (x Float32x8) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
  
  // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x16) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
+func (x Float32x16) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
  
  // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x2) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
+func (x Float64x2) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
  
  // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x4) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
+func (x Float64x4) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
  
  // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x8) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
+func (x Float64x8) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
  
  /* DiffWithRoundWithPrecision */
  
@@ -1311,42 +1311,42 @@ func (x Float64x8) DiffWithRoundWithPrecision(prec uint8) Float64x8
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x4) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
+func (x Float32x4) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
  
  // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x8) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
+func (x Float32x8) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
  
  // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x16) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
+func (x Float32x16) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
  
  // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x2) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
+func (x Float64x2) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
  
  // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x4) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
+func (x Float64x4) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
  
  // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x8) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
+func (x Float64x8) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
  
  /* DiffWithTruncWithPrecision */
  
@@ -1399,42 +1399,42 @@ func (x Float64x8) DiffWithTruncWithPrecision(prec uint8) Float64x8
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x4) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
+func (x Float32x4) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
  
  // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x8) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
+func (x Float32x8) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
  
  // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x16) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
+func (x Float32x16) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
  
  // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x2) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
+func (x Float64x2) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
  
  // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x4) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
+func (x Float64x4) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
  
  // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x8) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
+func (x Float64x8) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
  
  /* Div */
  
@@ -1473,32 +1473,32 @@ func (x Float64x8) Div(y Float64x8) Float64x8
  // DivMasked divides elements of two vectors.
  //
  // Asm: VDIVPS, CPU Feature: AVX512F
-func (x Float32x4) DivMasked(y Float32x4, z Mask32x4) Float32x4
+func (x Float32x4) DivMasked(y Float32x4, mask Mask32x4) Float32x4
  
  // DivMasked divides elements of two vectors.
  //
  // Asm: VDIVPS, CPU Feature: AVX512F
-func (x Float32x8) DivMasked(y Float32x8, z Mask32x8) Float32x8
+func (x Float32x8) DivMasked(y Float32x8, mask Mask32x8) Float32x8
  
  // DivMasked divides elements of two vectors.
  //
  // Asm: VDIVPS, CPU Feature: AVX512F
-func (x Float32x16) DivMasked(y Float32x16, z Mask32x16) Float32x16
+func (x Float32x16) DivMasked(y Float32x16, mask Mask32x16) Float32x16
  
  // DivMasked divides elements of two vectors.
  //
  // Asm: VDIVPD, CPU Feature: AVX512F
-func (x Float64x2) DivMasked(y Float64x2, z Mask64x2) Float64x2
+func (x Float64x2) DivMasked(y Float64x2, mask Mask64x2) Float64x2
  
  // DivMasked divides elements of two vectors.
  //
  // Asm: VDIVPD, CPU Feature: AVX512F
-func (x Float64x4) DivMasked(y Float64x4, z Mask64x4) Float64x4
+func (x Float64x4) DivMasked(y Float64x4, mask Mask64x4) Float64x4
  
  // DivMasked divides elements of two vectors.
  //
  // Asm: VDIVPD, CPU Feature: AVX512F
-func (x Float64x8) DivMasked(y Float64x8, z Mask64x8) Float64x8
+func (x Float64x8) DivMasked(y Float64x8, mask Mask64x8) Float64x8
  
  /* DotProdBroadcast */
  
@@ -1674,152 +1674,152 @@ func (x Uint64x8) Equal(y Uint64x8) Mask64x8
  // EqualMasked compares for equality, masked.
  //
  // Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x4) EqualMasked(y Float32x4, z Mask32x4) Mask32x4
+func (x Float32x4) EqualMasked(y Float32x4, mask Mask32x4) Mask32x4
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x8) EqualMasked(y Float32x8, z Mask32x8) Mask32x8
+func (x Float32x8) EqualMasked(y Float32x8, mask Mask32x8) Mask32x8
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x16) EqualMasked(y Float32x16, z Mask32x16) Mask32x16
+func (x Float32x16) EqualMasked(y Float32x16, mask Mask32x16) Mask32x16
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x2) EqualMasked(y Float64x2, z Mask64x2) Mask64x2
+func (x Float64x2) EqualMasked(y Float64x2, mask Mask64x2) Mask64x2
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x4) EqualMasked(y Float64x4, z Mask64x4) Mask64x4
+func (x Float64x4) EqualMasked(y Float64x4, mask Mask64x4) Mask64x4
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x8) EqualMasked(y Float64x8, z Mask64x8) Mask64x8
+func (x Float64x8) EqualMasked(y Float64x8, mask Mask64x8) Mask64x8
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x16) EqualMasked(y Int8x16, z Mask8x16) Mask8x16
+func (x Int8x16) EqualMasked(y Int8x16, mask Mask8x16) Mask8x16
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x32) EqualMasked(y Int8x32, z Mask8x32) Mask8x32
+func (x Int8x32) EqualMasked(y Int8x32, mask Mask8x32) Mask8x32
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x64) EqualMasked(y Int8x64, z Mask8x64) Mask8x64
+func (x Int8x64) EqualMasked(y Int8x64, mask Mask8x64) Mask8x64
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x8) EqualMasked(y Int16x8, z Mask16x8) Mask16x8
+func (x Int16x8) EqualMasked(y Int16x8, mask Mask16x8) Mask16x8
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x16) EqualMasked(y Int16x16, z Mask16x16) Mask16x16
+func (x Int16x16) EqualMasked(y Int16x16, mask Mask16x16) Mask16x16
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x32) EqualMasked(y Int16x32, z Mask16x32) Mask16x32
+func (x Int16x32) EqualMasked(y Int16x32, mask Mask16x32) Mask16x32
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x4) EqualMasked(y Int32x4, z Mask32x4) Mask32x4
+func (x Int32x4) EqualMasked(y Int32x4, mask Mask32x4) Mask32x4
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x8) EqualMasked(y Int32x8, z Mask32x8) Mask32x8
+func (x Int32x8) EqualMasked(y Int32x8, mask Mask32x8) Mask32x8
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x16) EqualMasked(y Int32x16, z Mask32x16) Mask32x16
+func (x Int32x16) EqualMasked(y Int32x16, mask Mask32x16) Mask32x16
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x2) EqualMasked(y Int64x2, z Mask64x2) Mask64x2
+func (x Int64x2) EqualMasked(y Int64x2, mask Mask64x2) Mask64x2
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x4) EqualMasked(y Int64x4, z Mask64x4) Mask64x4
+func (x Int64x4) EqualMasked(y Int64x4, mask Mask64x4) Mask64x4
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x8) EqualMasked(y Int64x8, z Mask64x8) Mask64x8
+func (x Int64x8) EqualMasked(y Int64x8, mask Mask64x8) Mask64x8
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x16) EqualMasked(y Uint8x16, z Mask8x16) Mask8x16
+func (x Uint8x16) EqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x32) EqualMasked(y Uint8x32, z Mask8x32) Mask8x32
+func (x Uint8x32) EqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x64) EqualMasked(y Uint8x64, z Mask8x64) Mask8x64
+func (x Uint8x64) EqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x8) EqualMasked(y Uint16x8, z Mask16x8) Mask16x8
+func (x Uint16x8) EqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x16) EqualMasked(y Uint16x16, z Mask16x16) Mask16x16
+func (x Uint16x16) EqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x32) EqualMasked(y Uint16x32, z Mask16x32) Mask16x32
+func (x Uint16x32) EqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x4) EqualMasked(y Uint32x4, z Mask32x4) Mask32x4
+func (x Uint32x4) EqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x8) EqualMasked(y Uint32x8, z Mask32x8) Mask32x8
+func (x Uint32x8) EqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x16) EqualMasked(y Uint32x16, z Mask32x16) Mask32x16
+func (x Uint32x16) EqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x2) EqualMasked(y Uint64x2, z Mask64x2) Mask64x2
+func (x Uint64x2) EqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x4) EqualMasked(y Uint64x4, z Mask64x4) Mask64x4
+func (x Uint64x4) EqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
  
  // EqualMasked compares for equality, masked.
  //
  // Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x8) EqualMasked(y Uint64x8, z Mask64x8) Mask64x8
+func (x Uint64x8) EqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
  
  /* Floor */
  
@@ -1894,42 +1894,42 @@ func (x Float64x8) FloorWithPrecision(prec uint8) Float64x8
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x4) FloorWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
+func (x Float32x4) FloorWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
  
  // FloorWithPrecisionMasked rounds elements down with specified precision, masked.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x8) FloorWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
+func (x Float32x8) FloorWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
  
  // FloorWithPrecisionMasked rounds elements down with specified precision, masked.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x16) FloorWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
+func (x Float32x16) FloorWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
  
  // FloorWithPrecisionMasked rounds elements down with specified precision, masked.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x2) FloorWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
+func (x Float64x2) FloorWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
  
  // FloorWithPrecisionMasked rounds elements down with specified precision, masked.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x4) FloorWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
+func (x Float64x4) FloorWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
  
  // FloorWithPrecisionMasked rounds elements down with specified precision, masked.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x8) FloorWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
+func (x Float64x8) FloorWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
  
  /* FusedMultiplyAdd */
  
@@ -1968,32 +1968,32 @@ func (x Float64x8) FusedMultiplyAdd(y Float64x8, z Float64x8) Float64x8
  // FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
  //
  // Asm: VFMADD213PS, CPU Feature: AVX512F
-func (x Float32x4) FusedMultiplyAddMasked(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+func (x Float32x4) FusedMultiplyAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
  
  // FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
  //
  // Asm: VFMADD213PS, CPU Feature: AVX512F
-func (x Float32x8) FusedMultiplyAddMasked(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+func (x Float32x8) FusedMultiplyAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
  
  // FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
  //
  // Asm: VFMADD213PS, CPU Feature: AVX512F
-func (x Float32x16) FusedMultiplyAddMasked(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+func (x Float32x16) FusedMultiplyAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
  
  // FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
  //
  // Asm: VFMADD213PD, CPU Feature: AVX512F
-func (x Float64x2) FusedMultiplyAddMasked(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+func (x Float64x2) FusedMultiplyAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
  
  // FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
  //
  // Asm: VFMADD213PD, CPU Feature: AVX512F
-func (x Float64x4) FusedMultiplyAddMasked(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+func (x Float64x4) FusedMultiplyAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
  
  // FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
  //
  // Asm: VFMADD213PD, CPU Feature: AVX512F
-func (x Float64x8) FusedMultiplyAddMasked(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+func (x Float64x8) FusedMultiplyAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
  
  /* FusedMultiplyAddSub */
  
@@ -2032,32 +2032,32 @@ func (x Float64x8) FusedMultiplyAddSub(y Float64x8, z Float64x8) Float64x8
  // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
  //
  // Asm: VFMADDSUB213PS, CPU Feature: AVX512F
-func (x Float32x4) FusedMultiplyAddSubMasked(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+func (x Float32x4) FusedMultiplyAddSubMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
  
  // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
  //
  // Asm: VFMADDSUB213PS, CPU Feature: AVX512F
-func (x Float32x8) FusedMultiplyAddSubMasked(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+func (x Float32x8) FusedMultiplyAddSubMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
  
  // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
  //
  // Asm: VFMADDSUB213PS, CPU Feature: AVX512F
-func (x Float32x16) FusedMultiplyAddSubMasked(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+func (x Float32x16) FusedMultiplyAddSubMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
  
  // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
  //
  // Asm: VFMADDSUB213PD, CPU Feature: AVX512F
-func (x Float64x2) FusedMultiplyAddSubMasked(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+func (x Float64x2) FusedMultiplyAddSubMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
  
  // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
  //
  // Asm: VFMADDSUB213PD, CPU Feature: AVX512F
-func (x Float64x4) FusedMultiplyAddSubMasked(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+func (x Float64x4) FusedMultiplyAddSubMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
  
  // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
  //
  // Asm: VFMADDSUB213PD, CPU Feature: AVX512F
-func (x Float64x8) FusedMultiplyAddSubMasked(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+func (x Float64x8) FusedMultiplyAddSubMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
  
  /* FusedMultiplySubAdd */
  
@@ -2096,32 +2096,32 @@ func (x Float64x8) FusedMultiplySubAdd(y Float64x8, z Float64x8) Float64x8
  // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
  //
  // Asm: VFMSUBADD213PS, CPU Feature: AVX512F
-func (x Float32x4) FusedMultiplySubAddMasked(y Float32x4, z Float32x4, u Mask32x4) Float32x4
+func (x Float32x4) FusedMultiplySubAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
  
  // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
  //
  // Asm: VFMSUBADD213PS, CPU Feature: AVX512F
-func (x Float32x8) FusedMultiplySubAddMasked(y Float32x8, z Float32x8, u Mask32x8) Float32x8
+func (x Float32x8) FusedMultiplySubAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
  
  // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
  //
  // Asm: VFMSUBADD213PS, CPU Feature: AVX512F
-func (x Float32x16) FusedMultiplySubAddMasked(y Float32x16, z Float32x16, u Mask32x16) Float32x16
+func (x Float32x16) FusedMultiplySubAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
  
  // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
  //
  // Asm: VFMSUBADD213PD, CPU Feature: AVX512F
-func (x Float64x2) FusedMultiplySubAddMasked(y Float64x2, z Float64x2, u Mask64x2) Float64x2
+func (x Float64x2) FusedMultiplySubAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
  
  // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
  //
  // Asm: VFMSUBADD213PD, CPU Feature: AVX512F
-func (x Float64x4) FusedMultiplySubAddMasked(y Float64x4, z Float64x4, u Mask64x4) Float64x4
+func (x Float64x4) FusedMultiplySubAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
  
  // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
  //
  // Asm: VFMSUBADD213PD, CPU Feature: AVX512F
-func (x Float64x8) FusedMultiplySubAddMasked(y Float64x8, z Float64x8, u Mask64x8) Float64x8
+func (x Float64x8) FusedMultiplySubAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
  
  /* GaloisFieldAffineTransform */
  
@@ -2283,19 +2283,19 @@ func (x Uint8x64) GaloisFieldMul(y Uint8x64) Uint8x64
  // reduction polynomial x^8 + x^4 + x^3 + x + 1.
  //
  // Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
-func (x Uint8x16) GaloisFieldMulMasked(y Uint8x16, z Mask8x16) Uint8x16
+func (x Uint8x16) GaloisFieldMulMasked(y Uint8x16, mask Mask8x16) Uint8x16
  
  // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
  // reduction polynomial x^8 + x^4 + x^3 + x + 1.
  //
  // Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
-func (x Uint8x32) GaloisFieldMulMasked(y Uint8x32, z Mask8x32) Uint8x32
+func (x Uint8x32) GaloisFieldMulMasked(y Uint8x32, mask Mask8x32) Uint8x32
  
  // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
  // reduction polynomial x^8 + x^4 + x^3 + x + 1.
  //
  // Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
-func (x Uint8x64) GaloisFieldMulMasked(y Uint8x64, z Mask8x64) Uint8x64
+func (x Uint8x64) GaloisFieldMulMasked(y Uint8x64, mask Mask8x64) Uint8x64
  
  /* Get128 */
  
@@ -2736,304 +2736,304 @@ func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x4) GreaterEqualMasked(y Float32x4, z Mask32x4) Mask32x4
+func (x Float32x4) GreaterEqualMasked(y Float32x4, mask Mask32x4) Mask32x4
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x8) GreaterEqualMasked(y Float32x8, z Mask32x8) Mask32x8
+func (x Float32x8) GreaterEqualMasked(y Float32x8, mask Mask32x8) Mask32x8
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x16) GreaterEqualMasked(y Float32x16, z Mask32x16) Mask32x16
+func (x Float32x16) GreaterEqualMasked(y Float32x16, mask Mask32x16) Mask32x16
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x2) GreaterEqualMasked(y Float64x2, z Mask64x2) Mask64x2
+func (x Float64x2) GreaterEqualMasked(y Float64x2, mask Mask64x2) Mask64x2
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x4) GreaterEqualMasked(y Float64x4, z Mask64x4) Mask64x4
+func (x Float64x4) GreaterEqualMasked(y Float64x4, mask Mask64x4) Mask64x4
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x8) GreaterEqualMasked(y Float64x8, z Mask64x8) Mask64x8
+func (x Float64x8) GreaterEqualMasked(y Float64x8, mask Mask64x8) Mask64x8
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x16) GreaterEqualMasked(y Int8x16, z Mask8x16) Mask8x16
+func (x Int8x16) GreaterEqualMasked(y Int8x16, mask Mask8x16) Mask8x16
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x32) GreaterEqualMasked(y Int8x32, z Mask8x32) Mask8x32
+func (x Int8x32) GreaterEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x64) GreaterEqualMasked(y Int8x64, z Mask8x64) Mask8x64
+func (x Int8x64) GreaterEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x8) GreaterEqualMasked(y Int16x8, z Mask16x8) Mask16x8
+func (x Int16x8) GreaterEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x16) GreaterEqualMasked(y Int16x16, z Mask16x16) Mask16x16
+func (x Int16x16) GreaterEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x32) GreaterEqualMasked(y Int16x32, z Mask16x32) Mask16x32
+func (x Int16x32) GreaterEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x4) GreaterEqualMasked(y Int32x4, z Mask32x4) Mask32x4
+func (x Int32x4) GreaterEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x8) GreaterEqualMasked(y Int32x8, z Mask32x8) Mask32x8
+func (x Int32x8) GreaterEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x16) GreaterEqualMasked(y Int32x16, z Mask32x16) Mask32x16
+func (x Int32x16) GreaterEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x2) GreaterEqualMasked(y Int64x2, z Mask64x2) Mask64x2
+func (x Int64x2) GreaterEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x4) GreaterEqualMasked(y Int64x4, z Mask64x4) Mask64x4
+func (x Int64x4) GreaterEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x8) GreaterEqualMasked(y Int64x8, z Mask64x8) Mask64x8
+func (x Int64x8) GreaterEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x16) GreaterEqualMasked(y Uint8x16, z Mask8x16) Mask8x16
+func (x Uint8x16) GreaterEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x32) GreaterEqualMasked(y Uint8x32, z Mask8x32) Mask8x32
+func (x Uint8x32) GreaterEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x64) GreaterEqualMasked(y Uint8x64, z Mask8x64) Mask8x64
+func (x Uint8x64) GreaterEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x8) GreaterEqualMasked(y Uint16x8, z Mask16x8) Mask16x8
+func (x Uint16x8) GreaterEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x16) GreaterEqualMasked(y Uint16x16, z Mask16x16) Mask16x16
+func (x Uint16x16) GreaterEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x32) GreaterEqualMasked(y Uint16x32, z Mask16x32) Mask16x32
+func (x Uint16x32) GreaterEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x4) GreaterEqualMasked(y Uint32x4, z Mask32x4) Mask32x4
+func (x Uint32x4) GreaterEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x8) GreaterEqualMasked(y Uint32x8, z Mask32x8) Mask32x8
+func (x Uint32x8) GreaterEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x16) GreaterEqualMasked(y Uint32x16, z Mask32x16) Mask32x16
+func (x Uint32x16) GreaterEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x2) GreaterEqualMasked(y Uint64x2, z Mask64x2) Mask64x2
+func (x Uint64x2) GreaterEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x4) GreaterEqualMasked(y Uint64x4, z Mask64x4) Mask64x4
+func (x Uint64x4) GreaterEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
  
  // GreaterEqualMasked compares for greater than or equal.
  //
  // Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x8) GreaterEqualMasked(y Uint64x8, z Mask64x8) Mask64x8
+func (x Uint64x8) GreaterEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
  
  /* GreaterMasked */
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x4) GreaterMasked(y Float32x4, z Mask32x4) Mask32x4
+func (x Float32x4) GreaterMasked(y Float32x4, mask Mask32x4) Mask32x4
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x8) GreaterMasked(y Float32x8, z Mask32x8) Mask32x8
+func (x Float32x8) GreaterMasked(y Float32x8, mask Mask32x8) Mask32x8
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x16) GreaterMasked(y Float32x16, z Mask32x16) Mask32x16
+func (x Float32x16) GreaterMasked(y Float32x16, mask Mask32x16) Mask32x16
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x2) GreaterMasked(y Float64x2, z Mask64x2) Mask64x2
+func (x Float64x2) GreaterMasked(y Float64x2, mask Mask64x2) Mask64x2
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x4) GreaterMasked(y Float64x4, z Mask64x4) Mask64x4
+func (x Float64x4) GreaterMasked(y Float64x4, mask Mask64x4) Mask64x4
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x8) GreaterMasked(y Float64x8, z Mask64x8) Mask64x8
+func (x Float64x8) GreaterMasked(y Float64x8, mask Mask64x8) Mask64x8
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x16) GreaterMasked(y Int8x16, z Mask8x16) Mask8x16
+func (x Int8x16) GreaterMasked(y Int8x16, mask Mask8x16) Mask8x16
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x32) GreaterMasked(y Int8x32, z Mask8x32) Mask8x32
+func (x Int8x32) GreaterMasked(y Int8x32, mask Mask8x32) Mask8x32
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x64) GreaterMasked(y Int8x64, z Mask8x64) Mask8x64
+func (x Int8x64) GreaterMasked(y Int8x64, mask Mask8x64) Mask8x64
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x8) GreaterMasked(y Int16x8, z Mask16x8) Mask16x8
+func (x Int16x8) GreaterMasked(y Int16x8, mask Mask16x8) Mask16x8
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x16) GreaterMasked(y Int16x16, z Mask16x16) Mask16x16
+func (x Int16x16) GreaterMasked(y Int16x16, mask Mask16x16) Mask16x16
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x32) GreaterMasked(y Int16x32, z Mask16x32) Mask16x32
+func (x Int16x32) GreaterMasked(y Int16x32, mask Mask16x32) Mask16x32
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x4) GreaterMasked(y Int32x4, z Mask32x4) Mask32x4
+func (x Int32x4) GreaterMasked(y Int32x4, mask Mask32x4) Mask32x4
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x8) GreaterMasked(y Int32x8, z Mask32x8) Mask32x8
+func (x Int32x8) GreaterMasked(y Int32x8, mask Mask32x8) Mask32x8
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x16) GreaterMasked(y Int32x16, z Mask32x16) Mask32x16
+func (x Int32x16) GreaterMasked(y Int32x16, mask Mask32x16) Mask32x16
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x2) GreaterMasked(y Int64x2, z Mask64x2) Mask64x2
+func (x Int64x2) GreaterMasked(y Int64x2, mask Mask64x2) Mask64x2
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x4) GreaterMasked(y Int64x4, z Mask64x4) Mask64x4
+func (x Int64x4) GreaterMasked(y Int64x4, mask Mask64x4) Mask64x4
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x8) GreaterMasked(y Int64x8, z Mask64x8) Mask64x8
+func (x Int64x8) GreaterMasked(y Int64x8, mask Mask64x8) Mask64x8
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x16) GreaterMasked(y Uint8x16, z Mask8x16) Mask8x16
+func (x Uint8x16) GreaterMasked(y Uint8x16, mask Mask8x16) Mask8x16
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x32) GreaterMasked(y Uint8x32, z Mask8x32) Mask8x32
+func (x Uint8x32) GreaterMasked(y Uint8x32, mask Mask8x32) Mask8x32
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x64) GreaterMasked(y Uint8x64, z Mask8x64) Mask8x64
+func (x Uint8x64) GreaterMasked(y Uint8x64, mask Mask8x64) Mask8x64
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x8) GreaterMasked(y Uint16x8, z Mask16x8) Mask16x8
+func (x Uint16x8) GreaterMasked(y Uint16x8, mask Mask16x8) Mask16x8
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x16) GreaterMasked(y Uint16x16, z Mask16x16) Mask16x16
+func (x Uint16x16) GreaterMasked(y Uint16x16, mask Mask16x16) Mask16x16
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x32) GreaterMasked(y Uint16x32, z Mask16x32) Mask16x32
+func (x Uint16x32) GreaterMasked(y Uint16x32, mask Mask16x32) Mask16x32
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x4) GreaterMasked(y Uint32x4, z Mask32x4) Mask32x4
+func (x Uint32x4) GreaterMasked(y Uint32x4, mask Mask32x4) Mask32x4
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x8) GreaterMasked(y Uint32x8, z Mask32x8) Mask32x8
+func (x Uint32x8) GreaterMasked(y Uint32x8, mask Mask32x8) Mask32x8
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x16) GreaterMasked(y Uint32x16, z Mask32x16) Mask32x16
+func (x Uint32x16) GreaterMasked(y Uint32x16, mask Mask32x16) Mask32x16
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x2) GreaterMasked(y Uint64x2, z Mask64x2) Mask64x2
+func (x Uint64x2) GreaterMasked(y Uint64x2, mask Mask64x2) Mask64x2
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x4) GreaterMasked(y Uint64x4, z Mask64x4) Mask64x4
+func (x Uint64x4) GreaterMasked(y Uint64x4, mask Mask64x4) Mask64x4
  
  // GreaterMasked compares for greater than.
  //
  // Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x8) GreaterMasked(y Uint64x8, z Mask64x8) Mask64x8
+func (x Uint64x8) GreaterMasked(y Uint64x8, mask Mask64x8) Mask64x8
  
  /* IsNan */
  
@@ -3072,32 +3072,32 @@ func (x Float64x8) IsNan(y Float64x8) Mask64x8
  // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
  //
  // Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x4) IsNanMasked(y Float32x4, z Mask32x4) Mask32x4
+func (x Float32x4) IsNanMasked(y Float32x4, mask Mask32x4) Mask32x4
  
  // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
  //
  // Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x8) IsNanMasked(y Float32x8, z Mask32x8) Mask32x8
+func (x Float32x8) IsNanMasked(y Float32x8, mask Mask32x8) Mask32x8
  
  // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
  //
  // Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x16) IsNanMasked(y Float32x16, z Mask32x16) Mask32x16
+func (x Float32x16) IsNanMasked(y Float32x16, mask Mask32x16) Mask32x16
  
  // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
  //
  // Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x2) IsNanMasked(y Float64x2, z Mask64x2) Mask64x2
+func (x Float64x2) IsNanMasked(y Float64x2, mask Mask64x2) Mask64x2
  
  // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
  //
  // Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x4) IsNanMasked(y Float64x4, z Mask64x4) Mask64x4
+func (x Float64x4) IsNanMasked(y Float64x4, mask Mask64x4) Mask64x4
  
  // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
  //
  // Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x8) IsNanMasked(y Float64x8, z Mask64x8) Mask64x8
+func (x Float64x8) IsNanMasked(y Float64x8, mask Mask64x8) Mask64x8
  
  /* Less */
  
@@ -3408,304 +3408,304 @@ func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x4) LessEqualMasked(y Float32x4, z Mask32x4) Mask32x4
+func (x Float32x4) LessEqualMasked(y Float32x4, mask Mask32x4) Mask32x4
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x8) LessEqualMasked(y Float32x8, z Mask32x8) Mask32x8
+func (x Float32x8) LessEqualMasked(y Float32x8, mask Mask32x8) Mask32x8
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x16) LessEqualMasked(y Float32x16, z Mask32x16) Mask32x16
+func (x Float32x16) LessEqualMasked(y Float32x16, mask Mask32x16) Mask32x16
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x2) LessEqualMasked(y Float64x2, z Mask64x2) Mask64x2
+func (x Float64x2) LessEqualMasked(y Float64x2, mask Mask64x2) Mask64x2
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x4) LessEqualMasked(y Float64x4, z Mask64x4) Mask64x4
+func (x Float64x4) LessEqualMasked(y Float64x4, mask Mask64x4) Mask64x4
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x8) LessEqualMasked(y Float64x8, z Mask64x8) Mask64x8
+func (x Float64x8) LessEqualMasked(y Float64x8, mask Mask64x8) Mask64x8
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x16) LessEqualMasked(y Int8x16, z Mask8x16) Mask8x16
+func (x Int8x16) LessEqualMasked(y Int8x16, mask Mask8x16) Mask8x16
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x32) LessEqualMasked(y Int8x32, z Mask8x32) Mask8x32
+func (x Int8x32) LessEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x64) LessEqualMasked(y Int8x64, z Mask8x64) Mask8x64
+func (x Int8x64) LessEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x8) LessEqualMasked(y Int16x8, z Mask16x8) Mask16x8
+func (x Int16x8) LessEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x16) LessEqualMasked(y Int16x16, z Mask16x16) Mask16x16
+func (x Int16x16) LessEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x32) LessEqualMasked(y Int16x32, z Mask16x32) Mask16x32
+func (x Int16x32) LessEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x4) LessEqualMasked(y Int32x4, z Mask32x4) Mask32x4
+func (x Int32x4) LessEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x8) LessEqualMasked(y Int32x8, z Mask32x8) Mask32x8
+func (x Int32x8) LessEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x16) LessEqualMasked(y Int32x16, z Mask32x16) Mask32x16
+func (x Int32x16) LessEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x2) LessEqualMasked(y Int64x2, z Mask64x2) Mask64x2
+func (x Int64x2) LessEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x4) LessEqualMasked(y Int64x4, z Mask64x4) Mask64x4
+func (x Int64x4) LessEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x8) LessEqualMasked(y Int64x8, z Mask64x8) Mask64x8
+func (x Int64x8) LessEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x16) LessEqualMasked(y Uint8x16, z Mask8x16) Mask8x16
+func (x Uint8x16) LessEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x32) LessEqualMasked(y Uint8x32, z Mask8x32) Mask8x32
+func (x Uint8x32) LessEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x64) LessEqualMasked(y Uint8x64, z Mask8x64) Mask8x64
+func (x Uint8x64) LessEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x8) LessEqualMasked(y Uint16x8, z Mask16x8) Mask16x8
+func (x Uint16x8) LessEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x16) LessEqualMasked(y Uint16x16, z Mask16x16) Mask16x16
+func (x Uint16x16) LessEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x32) LessEqualMasked(y Uint16x32, z Mask16x32) Mask16x32
+func (x Uint16x32) LessEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x4) LessEqualMasked(y Uint32x4, z Mask32x4) Mask32x4
+func (x Uint32x4) LessEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x8) LessEqualMasked(y Uint32x8, z Mask32x8) Mask32x8
+func (x Uint32x8) LessEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x16) LessEqualMasked(y Uint32x16, z Mask32x16) Mask32x16
+func (x Uint32x16) LessEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x2) LessEqualMasked(y Uint64x2, z Mask64x2) Mask64x2
+func (x Uint64x2) LessEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x4) LessEqualMasked(y Uint64x4, z Mask64x4) Mask64x4
+func (x Uint64x4) LessEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
  
  // LessEqualMasked compares for less than or equal.
  //
  // Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x8) LessEqualMasked(y Uint64x8, z Mask64x8) Mask64x8
+func (x Uint64x8) LessEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
  
  /* LessMasked */
  
  // LessMasked compares for less than.
  //
  // Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x4) LessMasked(y Float32x4, z Mask32x4) Mask32x4
+func (x Float32x4) LessMasked(y Float32x4, mask Mask32x4) Mask32x4
  
  // LessMasked compares for less than.
  //
  // Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x8) LessMasked(y Float32x8, z Mask32x8) Mask32x8
+func (x Float32x8) LessMasked(y Float32x8, mask Mask32x8) Mask32x8
  
  // LessMasked compares for less than.
  //
  // Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x16) LessMasked(y Float32x16, z Mask32x16) Mask32x16
+func (x Float32x16) LessMasked(y Float32x16, mask Mask32x16) Mask32x16
  
  // LessMasked compares for less than.
  //
  // Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x2) LessMasked(y Float64x2, z Mask64x2) Mask64x2
+func (x Float64x2) LessMasked(y Float64x2, mask Mask64x2) Mask64x2
  
  // LessMasked compares for less than.
  //
  // Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x4) LessMasked(y Float64x4, z Mask64x4) Mask64x4
+func (x Float64x4) LessMasked(y Float64x4, mask Mask64x4) Mask64x4
  
  // LessMasked compares for less than.
  //
  // Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x8) LessMasked(y Float64x8, z Mask64x8) Mask64x8
+func (x Float64x8) LessMasked(y Float64x8, mask Mask64x8) Mask64x8
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x16) LessMasked(y Int8x16, z Mask8x16) Mask8x16
+func (x Int8x16) LessMasked(y Int8x16, mask Mask8x16) Mask8x16
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x32) LessMasked(y Int8x32, z Mask8x32) Mask8x32
+func (x Int8x32) LessMasked(y Int8x32, mask Mask8x32) Mask8x32
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x64) LessMasked(y Int8x64, z Mask8x64) Mask8x64
+func (x Int8x64) LessMasked(y Int8x64, mask Mask8x64) Mask8x64
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x8) LessMasked(y Int16x8, z Mask16x8) Mask16x8
+func (x Int16x8) LessMasked(y Int16x8, mask Mask16x8) Mask16x8
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x16) LessMasked(y Int16x16, z Mask16x16) Mask16x16
+func (x Int16x16) LessMasked(y Int16x16, mask Mask16x16) Mask16x16
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x32) LessMasked(y Int16x32, z Mask16x32) Mask16x32
+func (x Int16x32) LessMasked(y Int16x32, mask Mask16x32) Mask16x32
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x4) LessMasked(y Int32x4, z Mask32x4) Mask32x4
+func (x Int32x4) LessMasked(y Int32x4, mask Mask32x4) Mask32x4
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x8) LessMasked(y Int32x8, z Mask32x8) Mask32x8
+func (x Int32x8) LessMasked(y Int32x8, mask Mask32x8) Mask32x8
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x16) LessMasked(y Int32x16, z Mask32x16) Mask32x16
+func (x Int32x16) LessMasked(y Int32x16, mask Mask32x16) Mask32x16
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x2) LessMasked(y Int64x2, z Mask64x2) Mask64x2
+func (x Int64x2) LessMasked(y Int64x2, mask Mask64x2) Mask64x2
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x4) LessMasked(y Int64x4, z Mask64x4) Mask64x4
+func (x Int64x4) LessMasked(y Int64x4, mask Mask64x4) Mask64x4
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x8) LessMasked(y Int64x8, z Mask64x8) Mask64x8
+func (x Int64x8) LessMasked(y Int64x8, mask Mask64x8) Mask64x8
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x16) LessMasked(y Uint8x16, z Mask8x16) Mask8x16
+func (x Uint8x16) LessMasked(y Uint8x16, mask Mask8x16) Mask8x16
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x32) LessMasked(y Uint8x32, z Mask8x32) Mask8x32
+func (x Uint8x32) LessMasked(y Uint8x32, mask Mask8x32) Mask8x32
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x64) LessMasked(y Uint8x64, z Mask8x64) Mask8x64
+func (x Uint8x64) LessMasked(y Uint8x64, mask Mask8x64) Mask8x64
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x8) LessMasked(y Uint16x8, z Mask16x8) Mask16x8
+func (x Uint16x8) LessMasked(y Uint16x8, mask Mask16x8) Mask16x8
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x16) LessMasked(y Uint16x16, z Mask16x16) Mask16x16
+func (x Uint16x16) LessMasked(y Uint16x16, mask Mask16x16) Mask16x16
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x32) LessMasked(y Uint16x32, z Mask16x32) Mask16x32
+func (x Uint16x32) LessMasked(y Uint16x32, mask Mask16x32) Mask16x32
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x4) LessMasked(y Uint32x4, z Mask32x4) Mask32x4
+func (x Uint32x4) LessMasked(y Uint32x4, mask Mask32x4) Mask32x4
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x8) LessMasked(y Uint32x8, z Mask32x8) Mask32x8
+func (x Uint32x8) LessMasked(y Uint32x8, mask Mask32x8) Mask32x8
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x16) LessMasked(y Uint32x16, z Mask32x16) Mask32x16
+func (x Uint32x16) LessMasked(y Uint32x16, mask Mask32x16) Mask32x16
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x2) LessMasked(y Uint64x2, z Mask64x2) Mask64x2
+func (x Uint64x2) LessMasked(y Uint64x2, mask Mask64x2) Mask64x2
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x4) LessMasked(y Uint64x4, z Mask64x4) Mask64x4
+func (x Uint64x4) LessMasked(y Uint64x4, mask Mask64x4) Mask64x4
  
  // LessMasked compares for less than.
  //
  // Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x8) LessMasked(y Uint64x8, z Mask64x8) Mask64x8
+func (x Uint64x8) LessMasked(y Uint64x8, mask Mask64x8) Mask64x8
  
  /* Max */
  
@@ -3864,152 +3864,152 @@ func (x Uint64x8) Max(y Uint64x8) Uint64x8
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VMAXPS, CPU Feature: AVX512F
-func (x Float32x4) MaxMasked(y Float32x4, z Mask32x4) Float32x4
+func (x Float32x4) MaxMasked(y Float32x4, mask Mask32x4) Float32x4
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VMAXPS, CPU Feature: AVX512F
-func (x Float32x8) MaxMasked(y Float32x8, z Mask32x8) Float32x8
+func (x Float32x8) MaxMasked(y Float32x8, mask Mask32x8) Float32x8
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VMAXPS, CPU Feature: AVX512F
-func (x Float32x16) MaxMasked(y Float32x16, z Mask32x16) Float32x16
+func (x Float32x16) MaxMasked(y Float32x16, mask Mask32x16) Float32x16
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VMAXPD, CPU Feature: AVX512F
-func (x Float64x2) MaxMasked(y Float64x2, z Mask64x2) Float64x2
+func (x Float64x2) MaxMasked(y Float64x2, mask Mask64x2) Float64x2
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VMAXPD, CPU Feature: AVX512F
-func (x Float64x4) MaxMasked(y Float64x4, z Mask64x4) Float64x4
+func (x Float64x4) MaxMasked(y Float64x4, mask Mask64x4) Float64x4
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VMAXPD, CPU Feature: AVX512F
-func (x Float64x8) MaxMasked(y Float64x8, z Mask64x8) Float64x8
+func (x Float64x8) MaxMasked(y Float64x8, mask Mask64x8) Float64x8
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXSB, CPU Feature: AVX512BW
-func (x Int8x16) MaxMasked(y Int8x16, z Mask8x16) Int8x16
+func (x Int8x16) MaxMasked(y Int8x16, mask Mask8x16) Int8x16
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXSB, CPU Feature: AVX512BW
-func (x Int8x32) MaxMasked(y Int8x32, z Mask8x32) Int8x32
+func (x Int8x32) MaxMasked(y Int8x32, mask Mask8x32) Int8x32
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXSB, CPU Feature: AVX512BW
-func (x Int8x64) MaxMasked(y Int8x64, z Mask8x64) Int8x64
+func (x Int8x64) MaxMasked(y Int8x64, mask Mask8x64) Int8x64
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXSW, CPU Feature: AVX512BW
-func (x Int16x8) MaxMasked(y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) MaxMasked(y Int16x8, mask Mask16x8) Int16x8
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXSW, CPU Feature: AVX512BW
-func (x Int16x16) MaxMasked(y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) MaxMasked(y Int16x16, mask Mask16x16) Int16x16
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXSW, CPU Feature: AVX512BW
-func (x Int16x32) MaxMasked(y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) MaxMasked(y Int16x32, mask Mask16x32) Int16x32
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXSD, CPU Feature: AVX512F
-func (x Int32x4) MaxMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) MaxMasked(y Int32x4, mask Mask32x4) Int32x4
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXSD, CPU Feature: AVX512F
-func (x Int32x8) MaxMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) MaxMasked(y Int32x8, mask Mask32x8) Int32x8
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXSD, CPU Feature: AVX512F
-func (x Int32x16) MaxMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) MaxMasked(y Int32x16, mask Mask32x16) Int32x16
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXSQ, CPU Feature: AVX512F
-func (x Int64x2) MaxMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) MaxMasked(y Int64x2, mask Mask64x2) Int64x2
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXSQ, CPU Feature: AVX512F
-func (x Int64x4) MaxMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) MaxMasked(y Int64x4, mask Mask64x4) Int64x4
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXSQ, CPU Feature: AVX512F
-func (x Int64x8) MaxMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) MaxMasked(y Int64x8, mask Mask64x8) Int64x8
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXUB, CPU Feature: AVX512BW
-func (x Uint8x16) MaxMasked(y Uint8x16, z Mask8x16) Uint8x16
+func (x Uint8x16) MaxMasked(y Uint8x16, mask Mask8x16) Uint8x16
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXUB, CPU Feature: AVX512BW
-func (x Uint8x32) MaxMasked(y Uint8x32, z Mask8x32) Uint8x32
+func (x Uint8x32) MaxMasked(y Uint8x32, mask Mask8x32) Uint8x32
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXUB, CPU Feature: AVX512BW
-func (x Uint8x64) MaxMasked(y Uint8x64, z Mask8x64) Uint8x64
+func (x Uint8x64) MaxMasked(y Uint8x64, mask Mask8x64) Uint8x64
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXUW, CPU Feature: AVX512BW
-func (x Uint16x8) MaxMasked(y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) MaxMasked(y Uint16x8, mask Mask16x8) Uint16x8
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXUW, CPU Feature: AVX512BW
-func (x Uint16x16) MaxMasked(y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) MaxMasked(y Uint16x16, mask Mask16x16) Uint16x16
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXUW, CPU Feature: AVX512BW
-func (x Uint16x32) MaxMasked(y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) MaxMasked(y Uint16x32, mask Mask16x32) Uint16x32
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXUD, CPU Feature: AVX512F
-func (x Uint32x4) MaxMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) MaxMasked(y Uint32x4, mask Mask32x4) Uint32x4
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXUD, CPU Feature: AVX512F
-func (x Uint32x8) MaxMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) MaxMasked(y Uint32x8, mask Mask32x8) Uint32x8
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXUD, CPU Feature: AVX512F
-func (x Uint32x16) MaxMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) MaxMasked(y Uint32x16, mask Mask32x16) Uint32x16
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXUQ, CPU Feature: AVX512F
-func (x Uint64x2) MaxMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) MaxMasked(y Uint64x2, mask Mask64x2) Uint64x2
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXUQ, CPU Feature: AVX512F
-func (x Uint64x4) MaxMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) MaxMasked(y Uint64x4, mask Mask64x4) Uint64x4
  
  // MaxMasked computes the maximum of corresponding elements.
  //
  // Asm: VPMAXUQ, CPU Feature: AVX512F
-func (x Uint64x8) MaxMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) MaxMasked(y Uint64x8, mask Mask64x8) Uint64x8
  
  /* Min */
  
@@ -4168,152 +4168,152 @@ func (x Uint64x8) Min(y Uint64x8) Uint64x8
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VMINPS, CPU Feature: AVX512F
-func (x Float32x4) MinMasked(y Float32x4, z Mask32x4) Float32x4
+func (x Float32x4) MinMasked(y Float32x4, mask Mask32x4) Float32x4
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VMINPS, CPU Feature: AVX512F
-func (x Float32x8) MinMasked(y Float32x8, z Mask32x8) Float32x8
+func (x Float32x8) MinMasked(y Float32x8, mask Mask32x8) Float32x8
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VMINPS, CPU Feature: AVX512F
-func (x Float32x16) MinMasked(y Float32x16, z Mask32x16) Float32x16
+func (x Float32x16) MinMasked(y Float32x16, mask Mask32x16) Float32x16
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VMINPD, CPU Feature: AVX512F
-func (x Float64x2) MinMasked(y Float64x2, z Mask64x2) Float64x2
+func (x Float64x2) MinMasked(y Float64x2, mask Mask64x2) Float64x2
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VMINPD, CPU Feature: AVX512F
-func (x Float64x4) MinMasked(y Float64x4, z Mask64x4) Float64x4
+func (x Float64x4) MinMasked(y Float64x4, mask Mask64x4) Float64x4
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VMINPD, CPU Feature: AVX512F
-func (x Float64x8) MinMasked(y Float64x8, z Mask64x8) Float64x8
+func (x Float64x8) MinMasked(y Float64x8, mask Mask64x8) Float64x8
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINSB, CPU Feature: AVX512BW
-func (x Int8x16) MinMasked(y Int8x16, z Mask8x16) Int8x16
+func (x Int8x16) MinMasked(y Int8x16, mask Mask8x16) Int8x16
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINSB, CPU Feature: AVX512BW
-func (x Int8x32) MinMasked(y Int8x32, z Mask8x32) Int8x32
+func (x Int8x32) MinMasked(y Int8x32, mask Mask8x32) Int8x32
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINSB, CPU Feature: AVX512BW
-func (x Int8x64) MinMasked(y Int8x64, z Mask8x64) Int8x64
+func (x Int8x64) MinMasked(y Int8x64, mask Mask8x64) Int8x64
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINSW, CPU Feature: AVX512BW
-func (x Int16x8) MinMasked(y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) MinMasked(y Int16x8, mask Mask16x8) Int16x8
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINSW, CPU Feature: AVX512BW
-func (x Int16x16) MinMasked(y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) MinMasked(y Int16x16, mask Mask16x16) Int16x16
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINSW, CPU Feature: AVX512BW
-func (x Int16x32) MinMasked(y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) MinMasked(y Int16x32, mask Mask16x32) Int16x32
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINSD, CPU Feature: AVX512F
-func (x Int32x4) MinMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) MinMasked(y Int32x4, mask Mask32x4) Int32x4
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINSD, CPU Feature: AVX512F
-func (x Int32x8) MinMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) MinMasked(y Int32x8, mask Mask32x8) Int32x8
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINSD, CPU Feature: AVX512F
-func (x Int32x16) MinMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) MinMasked(y Int32x16, mask Mask32x16) Int32x16
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINSQ, CPU Feature: AVX512F
-func (x Int64x2) MinMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) MinMasked(y Int64x2, mask Mask64x2) Int64x2
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINSQ, CPU Feature: AVX512F
-func (x Int64x4) MinMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) MinMasked(y Int64x4, mask Mask64x4) Int64x4
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINSQ, CPU Feature: AVX512F
-func (x Int64x8) MinMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) MinMasked(y Int64x8, mask Mask64x8) Int64x8
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINUB, CPU Feature: AVX512BW
-func (x Uint8x16) MinMasked(y Uint8x16, z Mask8x16) Uint8x16
+func (x Uint8x16) MinMasked(y Uint8x16, mask Mask8x16) Uint8x16
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINUB, CPU Feature: AVX512BW
-func (x Uint8x32) MinMasked(y Uint8x32, z Mask8x32) Uint8x32
+func (x Uint8x32) MinMasked(y Uint8x32, mask Mask8x32) Uint8x32
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINUB, CPU Feature: AVX512BW
-func (x Uint8x64) MinMasked(y Uint8x64, z Mask8x64) Uint8x64
+func (x Uint8x64) MinMasked(y Uint8x64, mask Mask8x64) Uint8x64
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINUW, CPU Feature: AVX512BW
-func (x Uint16x8) MinMasked(y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) MinMasked(y Uint16x8, mask Mask16x8) Uint16x8
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINUW, CPU Feature: AVX512BW
-func (x Uint16x16) MinMasked(y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) MinMasked(y Uint16x16, mask Mask16x16) Uint16x16
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINUW, CPU Feature: AVX512BW
-func (x Uint16x32) MinMasked(y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) MinMasked(y Uint16x32, mask Mask16x32) Uint16x32
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINUD, CPU Feature: AVX512F
-func (x Uint32x4) MinMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) MinMasked(y Uint32x4, mask Mask32x4) Uint32x4
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINUD, CPU Feature: AVX512F
-func (x Uint32x8) MinMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) MinMasked(y Uint32x8, mask Mask32x8) Uint32x8
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINUD, CPU Feature: AVX512F
-func (x Uint32x16) MinMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) MinMasked(y Uint32x16, mask Mask32x16) Uint32x16
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINUQ, CPU Feature: AVX512F
-func (x Uint64x2) MinMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) MinMasked(y Uint64x2, mask Mask64x2) Uint64x2
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINUQ, CPU Feature: AVX512F
-func (x Uint64x4) MinMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) MinMasked(y Uint64x4, mask Mask64x4) Uint64x4
  
  // MinMasked computes the minimum of corresponding elements.
  //
  // Asm: VPMINUQ, CPU Feature: AVX512F
-func (x Uint64x8) MinMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) MinMasked(y Uint64x8, mask Mask64x8) Uint64x8
  
  /* Mul */
  
@@ -4384,32 +4384,32 @@ func (x Float64x8) MulByPowOf2(y Float64x8) Float64x8
  // MulByPowOf2Masked multiplies elements by a power of 2.
  //
  // Asm: VSCALEFPS, CPU Feature: AVX512F
-func (x Float32x4) MulByPowOf2Masked(y Float32x4, z Mask32x4) Float32x4
+func (x Float32x4) MulByPowOf2Masked(y Float32x4, mask Mask32x4) Float32x4
  
  // MulByPowOf2Masked multiplies elements by a power of 2.
  //
  // Asm: VSCALEFPS, CPU Feature: AVX512F
-func (x Float32x8) MulByPowOf2Masked(y Float32x8, z Mask32x8) Float32x8
+func (x Float32x8) MulByPowOf2Masked(y Float32x8, mask Mask32x8) Float32x8
  
  // MulByPowOf2Masked multiplies elements by a power of 2.
  //
  // Asm: VSCALEFPS, CPU Feature: AVX512F
-func (x Float32x16) MulByPowOf2Masked(y Float32x16, z Mask32x16) Float32x16
+func (x Float32x16) MulByPowOf2Masked(y Float32x16, mask Mask32x16) Float32x16
  
  // MulByPowOf2Masked multiplies elements by a power of 2.
  //
  // Asm: VSCALEFPD, CPU Feature: AVX512F
-func (x Float64x2) MulByPowOf2Masked(y Float64x2, z Mask64x2) Float64x2
+func (x Float64x2) MulByPowOf2Masked(y Float64x2, mask Mask64x2) Float64x2
  
  // MulByPowOf2Masked multiplies elements by a power of 2.
  //
  // Asm: VSCALEFPD, CPU Feature: AVX512F
-func (x Float64x4) MulByPowOf2Masked(y Float64x4, z Mask64x4) Float64x4
+func (x Float64x4) MulByPowOf2Masked(y Float64x4, mask Mask64x4) Float64x4
  
  // MulByPowOf2Masked multiplies elements by a power of 2.
  //
  // Asm: VSCALEFPD, CPU Feature: AVX512F
-func (x Float64x8) MulByPowOf2Masked(y Float64x8, z Mask64x8) Float64x8
+func (x Float64x8) MulByPowOf2Masked(y Float64x8, mask Mask64x8) Float64x8
  
  /* MulEvenWiden */
  
@@ -4479,37 +4479,37 @@ func (x Uint64x8) MulEvenWiden(y Uint64x8) Uint64x8
  // Result[i] = v1.Even[i] * v2.Even[i].
  //
  // Asm: VPMULDQ, CPU Feature: AVX512F
-func (x Int64x2) MulEvenWidenMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) MulEvenWidenMasked(y Int64x2, mask Mask64x2) Int64x2
  
  // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
  // Result[i] = v1.Even[i] * v2.Even[i].
  //
  // Asm: VPMULDQ, CPU Feature: AVX512F
-func (x Int64x4) MulEvenWidenMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) MulEvenWidenMasked(y Int64x4, mask Mask64x4) Int64x4
  
  // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
  // Result[i] = v1.Even[i] * v2.Even[i].
  //
  // Asm: VPMULDQ, CPU Feature: AVX512F
-func (x Int64x8) MulEvenWidenMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) MulEvenWidenMasked(y Int64x8, mask Mask64x8) Int64x8
  
  // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
  // Result[i] = v1.Even[i] * v2.Even[i].
  //
  // Asm: VPMULUDQ, CPU Feature: AVX512F
-func (x Uint64x2) MulEvenWidenMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) MulEvenWidenMasked(y Uint64x2, mask Mask64x2) Uint64x2
  
  // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
  // Result[i] = v1.Even[i] * v2.Even[i].
  //
  // Asm: VPMULUDQ, CPU Feature: AVX512F
-func (x Uint64x4) MulEvenWidenMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) MulEvenWidenMasked(y Uint64x4, mask Mask64x4) Uint64x4
  
  // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
  // Result[i] = v1.Even[i] * v2.Even[i].
  //
  // Asm: VPMULUDQ, CPU Feature: AVX512F
-func (x Uint64x8) MulEvenWidenMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) MulEvenWidenMasked(y Uint64x8, mask Mask64x8) Uint64x8
  
  /* MulHigh */
  
@@ -4548,32 +4548,32 @@ func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32
  // MulHighMasked multiplies elements and stores the high part of the result, masked.
  //
  // Asm: VPMULHW, CPU Feature: AVX512BW
-func (x Int16x8) MulHighMasked(y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) MulHighMasked(y Int16x8, mask Mask16x8) Int16x8
  
  // MulHighMasked multiplies elements and stores the high part of the result, masked.
  //
  // Asm: VPMULHW, CPU Feature: AVX512BW
-func (x Int16x16) MulHighMasked(y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) MulHighMasked(y Int16x16, mask Mask16x16) Int16x16
  
  // MulHighMasked multiplies elements and stores the high part of the result, masked.
  //
  // Asm: VPMULHW, CPU Feature: AVX512BW
-func (x Int16x32) MulHighMasked(y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) MulHighMasked(y Int16x32, mask Mask16x32) Int16x32
  
  // MulHighMasked multiplies elements and stores the high part of the result, masked.
  //
  // Asm: VPMULHUW, CPU Feature: AVX512BW
-func (x Uint16x8) MulHighMasked(y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) MulHighMasked(y Uint16x8, mask Mask16x8) Uint16x8
  
  // MulHighMasked multiplies elements and stores the high part of the result, masked.
  //
  // Asm: VPMULHUW, CPU Feature: AVX512BW
-func (x Uint16x16) MulHighMasked(y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) MulHighMasked(y Uint16x16, mask Mask16x16) Uint16x16
  
  // MulHighMasked multiplies elements and stores the high part of the result, masked.
  //
  // Asm: VPMULHUW, CPU Feature: AVX512BW
-func (x Uint16x32) MulHighMasked(y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) MulHighMasked(y Uint16x32, mask Mask16x32) Uint16x32
  
  /* MulLow */
  
@@ -4627,79 +4627,79 @@ func (x Int64x8) MulLow(y Int64x8) Int64x8
  // MulLowMasked multiplies elements and stores the low part of the result, masked.
  //
  // Asm: VPMULLW, CPU Feature: AVX512BW
-func (x Int16x8) MulLowMasked(y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) MulLowMasked(y Int16x8, mask Mask16x8) Int16x8
  
  // MulLowMasked multiplies elements and stores the low part of the result, masked.
  //
  // Asm: VPMULLW, CPU Feature: AVX512BW
-func (x Int16x16) MulLowMasked(y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) MulLowMasked(y Int16x16, mask Mask16x16) Int16x16
  
  // MulLowMasked multiplies elements and stores the low part of the result, masked.
  //
  // Asm: VPMULLW, CPU Feature: AVX512BW
-func (x Int16x32) MulLowMasked(y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) MulLowMasked(y Int16x32, mask Mask16x32) Int16x32
  
  // MulLowMasked multiplies elements and stores the low part of the result, masked.
  //
  // Asm: VPMULLD, CPU Feature: AVX512F
-func (x Int32x4) MulLowMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) MulLowMasked(y Int32x4, mask Mask32x4) Int32x4
  
  // MulLowMasked multiplies elements and stores the low part of the result, masked.
  //
  // Asm: VPMULLD, CPU Feature: AVX512F
-func (x Int32x8) MulLowMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) MulLowMasked(y Int32x8, mask Mask32x8) Int32x8
  
  // MulLowMasked multiplies elements and stores the low part of the result, masked.
  //
  // Asm: VPMULLD, CPU Feature: AVX512F
-func (x Int32x16) MulLowMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) MulLowMasked(y Int32x16, mask Mask32x16) Int32x16
  
  // MulLowMasked multiplies elements and stores the low part of the result, masked.
  //
  // Asm: VPMULLQ, CPU Feature: AVX512DQ
-func (x Int64x2) MulLowMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) MulLowMasked(y Int64x2, mask Mask64x2) Int64x2
  
  // MulLowMasked multiplies elements and stores the low part of the result, masked.
  //
  // Asm: VPMULLQ, CPU Feature: AVX512DQ
-func (x Int64x4) MulLowMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) MulLowMasked(y Int64x4, mask Mask64x4) Int64x4
  
  // MulLowMasked multiplies elements and stores the low part of the result, masked.
  //
  // Asm: VPMULLQ, CPU Feature: AVX512DQ
-func (x Int64x8) MulLowMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) MulLowMasked(y Int64x8, mask Mask64x8) Int64x8
  
  /* MulMasked */
  
  // MulMasked multiplies corresponding elements of two vectors, masked.
  //
  // Asm: VMULPS, CPU Feature: AVX512F
-func (x Float32x4) MulMasked(y Float32x4, z Mask32x4) Float32x4
+func (x Float32x4) MulMasked(y Float32x4, mask Mask32x4) Float32x4
  
  // MulMasked multiplies corresponding elements of two vectors, masked.
  //
  // Asm: VMULPS, CPU Feature: AVX512F
-func (x Float32x8) MulMasked(y Float32x8, z Mask32x8) Float32x8
+func (x Float32x8) MulMasked(y Float32x8, mask Mask32x8) Float32x8
  
  // MulMasked multiplies corresponding elements of two vectors, masked.
  //
  // Asm: VMULPS, CPU Feature: AVX512F
-func (x Float32x16) MulMasked(y Float32x16, z Mask32x16) Float32x16
+func (x Float32x16) MulMasked(y Float32x16, mask Mask32x16) Float32x16
  
  // MulMasked multiplies corresponding elements of two vectors, masked.
  //
  // Asm: VMULPD, CPU Feature: AVX512F
-func (x Float64x2) MulMasked(y Float64x2, z Mask64x2) Float64x2
+func (x Float64x2) MulMasked(y Float64x2, mask Mask64x2) Float64x2
  
  // MulMasked multiplies corresponding elements of two vectors, masked.
  //
  // Asm: VMULPD, CPU Feature: AVX512F
-func (x Float64x4) MulMasked(y Float64x4, z Mask64x4) Float64x4
+func (x Float64x4) MulMasked(y Float64x4, mask Mask64x4) Float64x4
  
  // MulMasked multiplies corresponding elements of two vectors, masked.
  //
  // Asm: VMULPD, CPU Feature: AVX512F
-func (x Float64x8) MulMasked(y Float64x8, z Mask64x8) Float64x8
+func (x Float64x8) MulMasked(y Float64x8, mask Mask64x8) Float64x8
  
  /* NotEqual */
  
@@ -4858,152 +4858,152 @@ func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8
  // NotEqualMasked compares for inequality.
  //
  // Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x4) NotEqualMasked(y Float32x4, z Mask32x4) Mask32x4
+func (x Float32x4) NotEqualMasked(y Float32x4, mask Mask32x4) Mask32x4
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x8) NotEqualMasked(y Float32x8, z Mask32x8) Mask32x8
+func (x Float32x8) NotEqualMasked(y Float32x8, mask Mask32x8) Mask32x8
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VCMPPS, CPU Feature: AVX512F
-func (x Float32x16) NotEqualMasked(y Float32x16, z Mask32x16) Mask32x16
+func (x Float32x16) NotEqualMasked(y Float32x16, mask Mask32x16) Mask32x16
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x2) NotEqualMasked(y Float64x2, z Mask64x2) Mask64x2
+func (x Float64x2) NotEqualMasked(y Float64x2, mask Mask64x2) Mask64x2
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x4) NotEqualMasked(y Float64x4, z Mask64x4) Mask64x4
+func (x Float64x4) NotEqualMasked(y Float64x4, mask Mask64x4) Mask64x4
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VCMPPD, CPU Feature: AVX512F
-func (x Float64x8) NotEqualMasked(y Float64x8, z Mask64x8) Mask64x8
+func (x Float64x8) NotEqualMasked(y Float64x8, mask Mask64x8) Mask64x8
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x16) NotEqualMasked(y Int8x16, z Mask8x16) Mask8x16
+func (x Int8x16) NotEqualMasked(y Int8x16, mask Mask8x16) Mask8x16
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x32) NotEqualMasked(y Int8x32, z Mask8x32) Mask8x32
+func (x Int8x32) NotEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x64) NotEqualMasked(y Int8x64, z Mask8x64) Mask8x64
+func (x Int8x64) NotEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x8) NotEqualMasked(y Int16x8, z Mask16x8) Mask16x8
+func (x Int16x8) NotEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x16) NotEqualMasked(y Int16x16, z Mask16x16) Mask16x16
+func (x Int16x16) NotEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x32) NotEqualMasked(y Int16x32, z Mask16x32) Mask16x32
+func (x Int16x32) NotEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x4) NotEqualMasked(y Int32x4, z Mask32x4) Mask32x4
+func (x Int32x4) NotEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x8) NotEqualMasked(y Int32x8, z Mask32x8) Mask32x8
+func (x Int32x8) NotEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x16) NotEqualMasked(y Int32x16, z Mask32x16) Mask32x16
+func (x Int32x16) NotEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x2) NotEqualMasked(y Int64x2, z Mask64x2) Mask64x2
+func (x Int64x2) NotEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x4) NotEqualMasked(y Int64x4, z Mask64x4) Mask64x4
+func (x Int64x4) NotEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x8) NotEqualMasked(y Int64x8, z Mask64x8) Mask64x8
+func (x Int64x8) NotEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x16) NotEqualMasked(y Uint8x16, z Mask8x16) Mask8x16
+func (x Uint8x16) NotEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x32) NotEqualMasked(y Uint8x32, z Mask8x32) Mask8x32
+func (x Uint8x32) NotEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x64) NotEqualMasked(y Uint8x64, z Mask8x64) Mask8x64
+func (x Uint8x64) NotEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x8) NotEqualMasked(y Uint16x8, z Mask16x8) Mask16x8
+func (x Uint16x8) NotEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x16) NotEqualMasked(y Uint16x16, z Mask16x16) Mask16x16
+func (x Uint16x16) NotEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x32) NotEqualMasked(y Uint16x32, z Mask16x32) Mask16x32
+func (x Uint16x32) NotEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x4) NotEqualMasked(y Uint32x4, z Mask32x4) Mask32x4
+func (x Uint32x4) NotEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x8) NotEqualMasked(y Uint32x8, z Mask32x8) Mask32x8
+func (x Uint32x8) NotEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x16) NotEqualMasked(y Uint32x16, z Mask32x16) Mask32x16
+func (x Uint32x16) NotEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x2) NotEqualMasked(y Uint64x2, z Mask64x2) Mask64x2
+func (x Uint64x2) NotEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x4) NotEqualMasked(y Uint64x4, z Mask64x4) Mask64x4
+func (x Uint64x4) NotEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
  
  // NotEqualMasked compares for inequality.
  //
  // Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x8) NotEqualMasked(y Uint64x8, z Mask64x8) Mask64x8
+func (x Uint64x8) NotEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
  
  /* Or */
  
@@ -5112,62 +5112,62 @@ func (x Uint64x8) Or(y Uint64x8) Uint64x8
  // OrMasked performs a masked bitwise OR operation between two vectors.
  //
  // Asm: VPORD, CPU Feature: AVX512F
-func (x Int32x4) OrMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) OrMasked(y Int32x4, mask Mask32x4) Int32x4
  
  // OrMasked performs a masked bitwise OR operation between two vectors.
  //
  // Asm: VPORD, CPU Feature: AVX512F
-func (x Int32x8) OrMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) OrMasked(y Int32x8, mask Mask32x8) Int32x8
  
  // OrMasked performs a masked bitwise OR operation between two vectors.
  //
  // Asm: VPORD, CPU Feature: AVX512F
-func (x Int32x16) OrMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) OrMasked(y Int32x16, mask Mask32x16) Int32x16
  
  // OrMasked performs a masked bitwise OR operation between two vectors.
  //
  // Asm: VPORQ, CPU Feature: AVX512F
-func (x Int64x2) OrMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) OrMasked(y Int64x2, mask Mask64x2) Int64x2
  
  // OrMasked performs a masked bitwise OR operation between two vectors.
  //
  // Asm: VPORQ, CPU Feature: AVX512F
-func (x Int64x4) OrMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) OrMasked(y Int64x4, mask Mask64x4) Int64x4
  
  // OrMasked performs a masked bitwise OR operation between two vectors.
  //
  // Asm: VPORQ, CPU Feature: AVX512F
-func (x Int64x8) OrMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) OrMasked(y Int64x8, mask Mask64x8) Int64x8
  
  // OrMasked performs a masked bitwise OR operation between two vectors.
  //
  // Asm: VPORD, CPU Feature: AVX512F
-func (x Uint32x4) OrMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) OrMasked(y Uint32x4, mask Mask32x4) Uint32x4
  
  // OrMasked performs a masked bitwise OR operation between two vectors.
  //
  // Asm: VPORD, CPU Feature: AVX512F
-func (x Uint32x8) OrMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) OrMasked(y Uint32x8, mask Mask32x8) Uint32x8
  
  // OrMasked performs a masked bitwise OR operation between two vectors.
  //
  // Asm: VPORD, CPU Feature: AVX512F
-func (x Uint32x16) OrMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) OrMasked(y Uint32x16, mask Mask32x16) Uint32x16
  
  // OrMasked performs a masked bitwise OR operation between two vectors.
  //
  // Asm: VPORQ, CPU Feature: AVX512F
-func (x Uint64x2) OrMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) OrMasked(y Uint64x2, mask Mask64x2) Uint64x2
  
  // OrMasked performs a masked bitwise OR operation between two vectors.
  //
  // Asm: VPORQ, CPU Feature: AVX512F
-func (x Uint64x4) OrMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) OrMasked(y Uint64x4, mask Mask64x4) Uint64x4
  
  // OrMasked performs a masked bitwise OR operation between two vectors.
  //
  // Asm: VPORQ, CPU Feature: AVX512F
-func (x Uint64x8) OrMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) OrMasked(y Uint64x8, mask Mask64x8) Uint64x8
  
  /* PairDotProd */
  
@@ -5211,17 +5211,17 @@ func (x Int32x16) PairDotProdAccumulate(y Int16x32, z Int16x32) Int32x16
  // PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
  //
  // Asm: VPDPWSSD, CPU Feature: AVX512VNNI
-func (x Int32x4) PairDotProdAccumulateMasked(y Int16x8, z Int16x8, u Mask32x4) Int32x4
+func (x Int32x4) PairDotProdAccumulateMasked(y Int16x8, z Int16x8, mask Mask32x4) Int32x4
  
  // PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
  //
  // Asm: VPDPWSSD, CPU Feature: AVX512VNNI
-func (x Int32x8) PairDotProdAccumulateMasked(y Int16x16, z Int16x16, u Mask32x8) Int32x8
+func (x Int32x8) PairDotProdAccumulateMasked(y Int16x16, z Int16x16, mask Mask32x8) Int32x8
  
  // PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
  //
  // Asm: VPDPWSSD, CPU Feature: AVX512VNNI
-func (x Int32x16) PairDotProdAccumulateMasked(y Int16x32, z Int16x32, u Mask32x16) Int32x16
+func (x Int32x16) PairDotProdAccumulateMasked(y Int16x32, z Int16x32, mask Mask32x16) Int32x16
  
  /* PairDotProdMasked */
  
@@ -5229,19 +5229,19 @@ func (x Int32x16) PairDotProdAccumulateMasked(y Int16x32, z Int16x32, u Mask32x1
  // yielding a vector of half as many elements with twice the input element size.
  //
  // Asm: VPMADDWD, CPU Feature: AVX512BW
-func (x Int16x8) PairDotProdMasked(y Int16x8, z Mask16x8) Int32x4
+func (x Int16x8) PairDotProdMasked(y Int16x8, mask Mask16x8) Int32x4
  
  // PairDotProdMasked multiplies the elements and add the pairs together,
  // yielding a vector of half as many elements with twice the input element size.
  //
  // Asm: VPMADDWD, CPU Feature: AVX512BW
-func (x Int16x16) PairDotProdMasked(y Int16x16, z Mask16x16) Int32x8
+func (x Int16x16) PairDotProdMasked(y Int16x16, mask Mask16x16) Int32x8
  
  // PairDotProdMasked multiplies the elements and add the pairs together,
  // yielding a vector of half as many elements with twice the input element size.
  //
  // Asm: VPMADDWD, CPU Feature: AVX512BW
-func (x Int16x32) PairDotProdMasked(y Int16x32, z Mask16x32) Int32x16
+func (x Int16x32) PairDotProdMasked(y Int16x32, mask Mask16x32) Int32x16
  
  /* PairwiseAdd */
  
@@ -5811,7 +5811,7 @@ func (x Uint64x8) Permute2(y Uint64x8, indices Uint64x8) Uint64x8
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x16) Permute2Masked(y Int8x16, indices Uint8x16, u Mask8x16) Int8x16
+func (x Int8x16) Permute2Masked(y Int8x16, indices Uint8x16, mask Mask8x16) Int8x16
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5819,7 +5819,7 @@ func (x Int8x16) Permute2Masked(y Int8x16, indices Uint8x16, u Mask8x16) Int8x16
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x16) Permute2Masked(y Uint8x16, indices Uint8x16, u Mask8x16) Uint8x16
+func (x Uint8x16) Permute2Masked(y Uint8x16, indices Uint8x16, mask Mask8x16) Uint8x16
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5827,7 +5827,7 @@ func (x Uint8x16) Permute2Masked(y Uint8x16, indices Uint8x16, u Mask8x16) Uint8
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x32) Permute2Masked(y Int8x32, indices Uint8x32, u Mask8x32) Int8x32
+func (x Int8x32) Permute2Masked(y Int8x32, indices Uint8x32, mask Mask8x32) Int8x32
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5835,7 +5835,7 @@ func (x Int8x32) Permute2Masked(y Int8x32, indices Uint8x32, u Mask8x32) Int8x32
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x32) Permute2Masked(y Uint8x32, indices Uint8x32, u Mask8x32) Uint8x32
+func (x Uint8x32) Permute2Masked(y Uint8x32, indices Uint8x32, mask Mask8x32) Uint8x32
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5843,7 +5843,7 @@ func (x Uint8x32) Permute2Masked(y Uint8x32, indices Uint8x32, u Mask8x32) Uint8
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x64) Permute2Masked(y Int8x64, indices Uint8x64, u Mask8x64) Int8x64
+func (x Int8x64) Permute2Masked(y Int8x64, indices Uint8x64, mask Mask8x64) Int8x64
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5851,7 +5851,7 @@ func (x Int8x64) Permute2Masked(y Int8x64, indices Uint8x64, u Mask8x64) Int8x64
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x64) Permute2Masked(y Uint8x64, indices Uint8x64, u Mask8x64) Uint8x64
+func (x Uint8x64) Permute2Masked(y Uint8x64, indices Uint8x64, mask Mask8x64) Uint8x64
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5859,7 +5859,7 @@ func (x Uint8x64) Permute2Masked(y Uint8x64, indices Uint8x64, u Mask8x64) Uint8
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2W, CPU Feature: AVX512BW
-func (x Int16x8) Permute2Masked(y Int16x8, indices Uint16x8, u Mask16x8) Int16x8
+func (x Int16x8) Permute2Masked(y Int16x8, indices Uint16x8, mask Mask16x8) Int16x8
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5867,7 +5867,7 @@ func (x Int16x8) Permute2Masked(y Int16x8, indices Uint16x8, u Mask16x8) Int16x8
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2W, CPU Feature: AVX512BW
-func (x Uint16x8) Permute2Masked(y Uint16x8, indices Uint16x8, u Mask16x8) Uint16x8
+func (x Uint16x8) Permute2Masked(y Uint16x8, indices Uint16x8, mask Mask16x8) Uint16x8
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5875,7 +5875,7 @@ func (x Uint16x8) Permute2Masked(y Uint16x8, indices Uint16x8, u Mask16x8) Uint1
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2W, CPU Feature: AVX512BW
-func (x Int16x16) Permute2Masked(y Int16x16, indices Uint16x16, u Mask16x16) Int16x16
+func (x Int16x16) Permute2Masked(y Int16x16, indices Uint16x16, mask Mask16x16) Int16x16
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5883,7 +5883,7 @@ func (x Int16x16) Permute2Masked(y Int16x16, indices Uint16x16, u Mask16x16) Int
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2W, CPU Feature: AVX512BW
-func (x Uint16x16) Permute2Masked(y Uint16x16, indices Uint16x16, u Mask16x16) Uint16x16
+func (x Uint16x16) Permute2Masked(y Uint16x16, indices Uint16x16, mask Mask16x16) Uint16x16
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5891,7 +5891,7 @@ func (x Uint16x16) Permute2Masked(y Uint16x16, indices Uint16x16, u Mask16x16) U
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2W, CPU Feature: AVX512BW
-func (x Int16x32) Permute2Masked(y Int16x32, indices Uint16x32, u Mask16x32) Int16x32
+func (x Int16x32) Permute2Masked(y Int16x32, indices Uint16x32, mask Mask16x32) Int16x32
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5899,7 +5899,7 @@ func (x Int16x32) Permute2Masked(y Int16x32, indices Uint16x32, u Mask16x32) Int
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2W, CPU Feature: AVX512BW
-func (x Uint16x32) Permute2Masked(y Uint16x32, indices Uint16x32, u Mask16x32) Uint16x32
+func (x Uint16x32) Permute2Masked(y Uint16x32, indices Uint16x32, mask Mask16x32) Uint16x32
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5907,7 +5907,7 @@ func (x Uint16x32) Permute2Masked(y Uint16x32, indices Uint16x32, u Mask16x32) U
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2PS, CPU Feature: AVX512F
-func (x Float32x4) Permute2Masked(y Float32x4, indices Uint32x4, u Mask32x4) Float32x4
+func (x Float32x4) Permute2Masked(y Float32x4, indices Uint32x4, mask Mask32x4) Float32x4
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5915,7 +5915,7 @@ func (x Float32x4) Permute2Masked(y Float32x4, indices Uint32x4, u Mask32x4) Flo
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2D, CPU Feature: AVX512F
-func (x Int32x4) Permute2Masked(y Int32x4, indices Uint32x4, u Mask32x4) Int32x4
+func (x Int32x4) Permute2Masked(y Int32x4, indices Uint32x4, mask Mask32x4) Int32x4
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5923,7 +5923,7 @@ func (x Int32x4) Permute2Masked(y Int32x4, indices Uint32x4, u Mask32x4) Int32x4
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2D, CPU Feature: AVX512F
-func (x Uint32x4) Permute2Masked(y Uint32x4, indices Uint32x4, u Mask32x4) Uint32x4
+func (x Uint32x4) Permute2Masked(y Uint32x4, indices Uint32x4, mask Mask32x4) Uint32x4
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5931,7 +5931,7 @@ func (x Uint32x4) Permute2Masked(y Uint32x4, indices Uint32x4, u Mask32x4) Uint3
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2PS, CPU Feature: AVX512F
-func (x Float32x8) Permute2Masked(y Float32x8, indices Uint32x8, u Mask32x8) Float32x8
+func (x Float32x8) Permute2Masked(y Float32x8, indices Uint32x8, mask Mask32x8) Float32x8
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5939,7 +5939,7 @@ func (x Float32x8) Permute2Masked(y Float32x8, indices Uint32x8, u Mask32x8) Flo
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2D, CPU Feature: AVX512F
-func (x Int32x8) Permute2Masked(y Int32x8, indices Uint32x8, u Mask32x8) Int32x8
+func (x Int32x8) Permute2Masked(y Int32x8, indices Uint32x8, mask Mask32x8) Int32x8
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5947,7 +5947,7 @@ func (x Int32x8) Permute2Masked(y Int32x8, indices Uint32x8, u Mask32x8) Int32x8
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2D, CPU Feature: AVX512F
-func (x Uint32x8) Permute2Masked(y Uint32x8, indices Uint32x8, u Mask32x8) Uint32x8
+func (x Uint32x8) Permute2Masked(y Uint32x8, indices Uint32x8, mask Mask32x8) Uint32x8
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5955,7 +5955,7 @@ func (x Uint32x8) Permute2Masked(y Uint32x8, indices Uint32x8, u Mask32x8) Uint3
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2PS, CPU Feature: AVX512F
-func (x Float32x16) Permute2Masked(y Float32x16, indices Uint32x16, u Mask32x16) Float32x16
+func (x Float32x16) Permute2Masked(y Float32x16, indices Uint32x16, mask Mask32x16) Float32x16
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5963,7 +5963,7 @@ func (x Float32x16) Permute2Masked(y Float32x16, indices Uint32x16, u Mask32x16)
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2D, CPU Feature: AVX512F
-func (x Int32x16) Permute2Masked(y Int32x16, indices Uint32x16, u Mask32x16) Int32x16
+func (x Int32x16) Permute2Masked(y Int32x16, indices Uint32x16, mask Mask32x16) Int32x16
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5971,7 +5971,7 @@ func (x Int32x16) Permute2Masked(y Int32x16, indices Uint32x16, u Mask32x16) Int
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2D, CPU Feature: AVX512F
-func (x Uint32x16) Permute2Masked(y Uint32x16, indices Uint32x16, u Mask32x16) Uint32x16
+func (x Uint32x16) Permute2Masked(y Uint32x16, indices Uint32x16, mask Mask32x16) Uint32x16
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5979,7 +5979,7 @@ func (x Uint32x16) Permute2Masked(y Uint32x16, indices Uint32x16, u Mask32x16) U
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2PD, CPU Feature: AVX512F
-func (x Float64x2) Permute2Masked(y Float64x2, indices Uint64x2, u Mask64x2) Float64x2
+func (x Float64x2) Permute2Masked(y Float64x2, indices Uint64x2, mask Mask64x2) Float64x2
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5987,7 +5987,7 @@ func (x Float64x2) Permute2Masked(y Float64x2, indices Uint64x2, u Mask64x2) Flo
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2Q, CPU Feature: AVX512F
-func (x Int64x2) Permute2Masked(y Int64x2, indices Uint64x2, u Mask64x2) Int64x2
+func (x Int64x2) Permute2Masked(y Int64x2, indices Uint64x2, mask Mask64x2) Int64x2
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -5995,7 +5995,7 @@ func (x Int64x2) Permute2Masked(y Int64x2, indices Uint64x2, u Mask64x2) Int64x2
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2Q, CPU Feature: AVX512F
-func (x Uint64x2) Permute2Masked(y Uint64x2, indices Uint64x2, u Mask64x2) Uint64x2
+func (x Uint64x2) Permute2Masked(y Uint64x2, indices Uint64x2, mask Mask64x2) Uint64x2
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -6003,7 +6003,7 @@ func (x Uint64x2) Permute2Masked(y Uint64x2, indices Uint64x2, u Mask64x2) Uint6
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2PD, CPU Feature: AVX512F
-func (x Float64x4) Permute2Masked(y Float64x4, indices Uint64x4, u Mask64x4) Float64x4
+func (x Float64x4) Permute2Masked(y Float64x4, indices Uint64x4, mask Mask64x4) Float64x4
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -6011,7 +6011,7 @@ func (x Float64x4) Permute2Masked(y Float64x4, indices Uint64x4, u Mask64x4) Flo
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2Q, CPU Feature: AVX512F
-func (x Int64x4) Permute2Masked(y Int64x4, indices Uint64x4, u Mask64x4) Int64x4
+func (x Int64x4) Permute2Masked(y Int64x4, indices Uint64x4, mask Mask64x4) Int64x4
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -6019,7 +6019,7 @@ func (x Int64x4) Permute2Masked(y Int64x4, indices Uint64x4, u Mask64x4) Int64x4
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2Q, CPU Feature: AVX512F
-func (x Uint64x4) Permute2Masked(y Uint64x4, indices Uint64x4, u Mask64x4) Uint64x4
+func (x Uint64x4) Permute2Masked(y Uint64x4, indices Uint64x4, mask Mask64x4) Uint64x4
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -6027,7 +6027,7 @@ func (x Uint64x4) Permute2Masked(y Uint64x4, indices Uint64x4, u Mask64x4) Uint6
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2PD, CPU Feature: AVX512F
-func (x Float64x8) Permute2Masked(y Float64x8, indices Uint64x8, u Mask64x8) Float64x8
+func (x Float64x8) Permute2Masked(y Float64x8, indices Uint64x8, mask Mask64x8) Float64x8
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -6035,7 +6035,7 @@ func (x Float64x8) Permute2Masked(y Float64x8, indices Uint64x8, u Mask64x8) Flo
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2Q, CPU Feature: AVX512F
-func (x Int64x8) Permute2Masked(y Int64x8, indices Uint64x8, u Mask64x8) Int64x8
+func (x Int64x8) Permute2Masked(y Int64x8, indices Uint64x8, mask Mask64x8) Int64x8
  
  // Permute2Masked performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -6043,7 +6043,7 @@ func (x Int64x8) Permute2Masked(y Int64x8, indices Uint64x8, u Mask64x8) Int64x8
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2Q, CPU Feature: AVX512F
-func (x Uint64x8) Permute2Masked(y Uint64x8, indices Uint64x8, u Mask64x8) Uint64x8
+func (x Uint64x8) Permute2Masked(y Uint64x8, indices Uint64x8, mask Mask64x8) Uint64x8
  
  /* PermuteMasked */
  
@@ -6052,168 +6052,168 @@ func (x Uint64x8) Permute2Masked(y Uint64x8, indices Uint64x8, u Mask64x8) Uint6
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x16) PermuteMasked(indices Uint8x16, z Mask8x16) Int8x16
+func (x Int8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Int8x16
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x16) PermuteMasked(indices Uint8x16, z Mask8x16) Uint8x16
+func (x Uint8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Uint8x16
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x32) PermuteMasked(indices Uint8x32, z Mask8x32) Int8x32
+func (x Int8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Int8x32
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x32) PermuteMasked(indices Uint8x32, z Mask8x32) Uint8x32
+func (x Uint8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Uint8x32
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x64) PermuteMasked(indices Uint8x64, z Mask8x64) Int8x64
+func (x Int8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Int8x64
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x64) PermuteMasked(indices Uint8x64, z Mask8x64) Uint8x64
+func (x Uint8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Uint8x64
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMW, CPU Feature: AVX512BW
-func (x Int16x8) PermuteMasked(indices Uint16x8, z Mask16x8) Int16x8
+func (x Int16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Int16x8
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMW, CPU Feature: AVX512BW
-func (x Uint16x8) PermuteMasked(indices Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Uint16x8
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMW, CPU Feature: AVX512BW
-func (x Int16x16) PermuteMasked(indices Uint16x16, z Mask16x16) Int16x16
+func (x Int16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Int16x16
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMW, CPU Feature: AVX512BW
-func (x Uint16x16) PermuteMasked(indices Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Uint16x16
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMW, CPU Feature: AVX512BW
-func (x Int16x32) PermuteMasked(indices Uint16x32, z Mask16x32) Int16x32
+func (x Int16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Int16x32
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMW, CPU Feature: AVX512BW
-func (x Uint16x32) PermuteMasked(indices Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Uint16x32
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMPS, CPU Feature: AVX512F
-func (x Float32x8) PermuteMasked(indices Uint32x8, z Mask32x8) Float32x8
+func (x Float32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Float32x8
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMD, CPU Feature: AVX512F
-func (x Int32x8) PermuteMasked(indices Uint32x8, z Mask32x8) Int32x8
+func (x Int32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Int32x8
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMD, CPU Feature: AVX512F
-func (x Uint32x8) PermuteMasked(indices Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Uint32x8
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMPS, CPU Feature: AVX512F
-func (x Float32x16) PermuteMasked(indices Uint32x16, z Mask32x16) Float32x16
+func (x Float32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Float32x16
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMD, CPU Feature: AVX512F
-func (x Int32x16) PermuteMasked(indices Uint32x16, z Mask32x16) Int32x16
+func (x Int32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Int32x16
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMD, CPU Feature: AVX512F
-func (x Uint32x16) PermuteMasked(indices Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Uint32x16
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMPD, CPU Feature: AVX512F
-func (x Float64x4) PermuteMasked(indices Uint64x4, z Mask64x4) Float64x4
+func (x Float64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Float64x4
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMQ, CPU Feature: AVX512F
-func (x Int64x4) PermuteMasked(indices Uint64x4, z Mask64x4) Int64x4
+func (x Int64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Int64x4
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMQ, CPU Feature: AVX512F
-func (x Uint64x4) PermuteMasked(indices Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Uint64x4
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMPD, CPU Feature: AVX512F
-func (x Float64x8) PermuteMasked(indices Uint64x8, z Mask64x8) Float64x8
+func (x Float64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Float64x8
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMQ, CPU Feature: AVX512F
-func (x Int64x8) PermuteMasked(indices Uint64x8, z Mask64x8) Int64x8
+func (x Int64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Int64x8
  
  // PermuteMasked performs a full permutation of vector y using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
  // Asm: VPERMQ, CPU Feature: AVX512F
-func (x Uint64x8) PermuteMasked(indices Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Uint64x8
  
  /* PopCount */
  
@@ -6342,122 +6342,122 @@ func (x Uint64x8) PopCount() Uint64x8
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x16) PopCountMasked(y Mask8x16) Int8x16
+func (x Int8x16) PopCountMasked(mask Mask8x16) Int8x16
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x32) PopCountMasked(y Mask8x32) Int8x32
+func (x Int8x32) PopCountMasked(mask Mask8x32) Int8x32
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x64) PopCountMasked(y Mask8x64) Int8x64
+func (x Int8x64) PopCountMasked(mask Mask8x64) Int8x64
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x8) PopCountMasked(y Mask16x8) Int16x8
+func (x Int16x8) PopCountMasked(mask Mask16x8) Int16x8
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x16) PopCountMasked(y Mask16x16) Int16x16
+func (x Int16x16) PopCountMasked(mask Mask16x16) Int16x16
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x32) PopCountMasked(y Mask16x32) Int16x32
+func (x Int16x32) PopCountMasked(mask Mask16x32) Int16x32
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x4) PopCountMasked(y Mask32x4) Int32x4
+func (x Int32x4) PopCountMasked(mask Mask32x4) Int32x4
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x8) PopCountMasked(y Mask32x8) Int32x8
+func (x Int32x8) PopCountMasked(mask Mask32x8) Int32x8
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x16) PopCountMasked(y Mask32x16) Int32x16
+func (x Int32x16) PopCountMasked(mask Mask32x16) Int32x16
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x2) PopCountMasked(y Mask64x2) Int64x2
+func (x Int64x2) PopCountMasked(mask Mask64x2) Int64x2
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x4) PopCountMasked(y Mask64x4) Int64x4
+func (x Int64x4) PopCountMasked(mask Mask64x4) Int64x4
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x8) PopCountMasked(y Mask64x8) Int64x8
+func (x Int64x8) PopCountMasked(mask Mask64x8) Int64x8
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x16) PopCountMasked(y Mask8x16) Uint8x16
+func (x Uint8x16) PopCountMasked(mask Mask8x16) Uint8x16
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x32) PopCountMasked(y Mask8x32) Uint8x32
+func (x Uint8x32) PopCountMasked(mask Mask8x32) Uint8x32
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x64) PopCountMasked(y Mask8x64) Uint8x64
+func (x Uint8x64) PopCountMasked(mask Mask8x64) Uint8x64
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x8) PopCountMasked(y Mask16x8) Uint16x8
+func (x Uint16x8) PopCountMasked(mask Mask16x8) Uint16x8
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x16) PopCountMasked(y Mask16x16) Uint16x16
+func (x Uint16x16) PopCountMasked(mask Mask16x16) Uint16x16
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x32) PopCountMasked(y Mask16x32) Uint16x32
+func (x Uint16x32) PopCountMasked(mask Mask16x32) Uint16x32
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x4) PopCountMasked(y Mask32x4) Uint32x4
+func (x Uint32x4) PopCountMasked(mask Mask32x4) Uint32x4
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x8) PopCountMasked(y Mask32x8) Uint32x8
+func (x Uint32x8) PopCountMasked(mask Mask32x8) Uint32x8
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x16) PopCountMasked(y Mask32x16) Uint32x16
+func (x Uint32x16) PopCountMasked(mask Mask32x16) Uint32x16
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x2) PopCountMasked(y Mask64x2) Uint64x2
+func (x Uint64x2) PopCountMasked(mask Mask64x2) Uint64x2
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x4) PopCountMasked(y Mask64x4) Uint64x4
+func (x Uint64x4) PopCountMasked(mask Mask64x4) Uint64x4
  
  // PopCountMasked counts the number of set bits in each element.
  //
  // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x8) PopCountMasked(y Mask64x8) Uint64x8
+func (x Uint64x8) PopCountMasked(mask Mask64x8) Uint64x8
  
  /* RotateAllLeft */
  
@@ -6552,84 +6552,84 @@ func (x Uint64x8) RotateAllLeft(shift uint8) Uint64x8
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPROLD, CPU Feature: AVX512F
-func (x Int32x4) RotateAllLeftMasked(shift uint8, y Mask32x4) Int32x4
+func (x Int32x4) RotateAllLeftMasked(shift uint8, mask Mask32x4) Int32x4
  
  // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
  //
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPROLD, CPU Feature: AVX512F
-func (x Int32x8) RotateAllLeftMasked(shift uint8, y Mask32x8) Int32x8
+func (x Int32x8) RotateAllLeftMasked(shift uint8, mask Mask32x8) Int32x8
  
  // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
  //
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPROLD, CPU Feature: AVX512F
-func (x Int32x16) RotateAllLeftMasked(shift uint8, y Mask32x16) Int32x16
+func (x Int32x16) RotateAllLeftMasked(shift uint8, mask Mask32x16) Int32x16
  
  // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
  //
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPROLQ, CPU Feature: AVX512F
-func (x Int64x2) RotateAllLeftMasked(shift uint8, y Mask64x2) Int64x2
+func (x Int64x2) RotateAllLeftMasked(shift uint8, mask Mask64x2) Int64x2
  
  // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
  //
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPROLQ, CPU Feature: AVX512F
-func (x Int64x4) RotateAllLeftMasked(shift uint8, y Mask64x4) Int64x4
+func (x Int64x4) RotateAllLeftMasked(shift uint8, mask Mask64x4) Int64x4
  
  // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
  //
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPROLQ, CPU Feature: AVX512F
-func (x Int64x8) RotateAllLeftMasked(shift uint8, y Mask64x8) Int64x8
+func (x Int64x8) RotateAllLeftMasked(shift uint8, mask Mask64x8) Int64x8
  
  // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
  //
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPROLD, CPU Feature: AVX512F
-func (x Uint32x4) RotateAllLeftMasked(shift uint8, y Mask32x4) Uint32x4
+func (x Uint32x4) RotateAllLeftMasked(shift uint8, mask Mask32x4) Uint32x4
  
  // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
  //
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPROLD, CPU Feature: AVX512F
-func (x Uint32x8) RotateAllLeftMasked(shift uint8, y Mask32x8) Uint32x8
+func (x Uint32x8) RotateAllLeftMasked(shift uint8, mask Mask32x8) Uint32x8
  
  // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
  //
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPROLD, CPU Feature: AVX512F
-func (x Uint32x16) RotateAllLeftMasked(shift uint8, y Mask32x16) Uint32x16
+func (x Uint32x16) RotateAllLeftMasked(shift uint8, mask Mask32x16) Uint32x16
  
  // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
  //
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPROLQ, CPU Feature: AVX512F
-func (x Uint64x2) RotateAllLeftMasked(shift uint8, y Mask64x2) Uint64x2
+func (x Uint64x2) RotateAllLeftMasked(shift uint8, mask Mask64x2) Uint64x2
  
  // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
  //
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPROLQ, CPU Feature: AVX512F
-func (x Uint64x4) RotateAllLeftMasked(shift uint8, y Mask64x4) Uint64x4
+func (x Uint64x4) RotateAllLeftMasked(shift uint8, mask Mask64x4) Uint64x4
  
  // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
  //
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPROLQ, CPU Feature: AVX512F
-func (x Uint64x8) RotateAllLeftMasked(shift uint8, y Mask64x8) Uint64x8
+func (x Uint64x8) RotateAllLeftMasked(shift uint8, mask Mask64x8) Uint64x8
  
  /* RotateAllRight */
  
@@ -6724,84 +6724,84 @@ func (x Uint64x8) RotateAllRight(shift uint8) Uint64x8
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPRORD, CPU Feature: AVX512F
-func (x Int32x4) RotateAllRightMasked(shift uint8, y Mask32x4) Int32x4
+func (x Int32x4) RotateAllRightMasked(shift uint8, mask Mask32x4) Int32x4
  
  // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
  //
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPRORD, CPU Feature: AVX512F
-func (x Int32x8) RotateAllRightMasked(shift uint8, y Mask32x8) Int32x8
+func (x Int32x8) RotateAllRightMasked(shift uint8, mask Mask32x8) Int32x8
  
  // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
  //
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPRORD, CPU Feature: AVX512F
-func (x Int32x16) RotateAllRightMasked(shift uint8, y Mask32x16) Int32x16
+func (x Int32x16) RotateAllRightMasked(shift uint8, mask Mask32x16) Int32x16
  
  // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
  //
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPRORQ, CPU Feature: AVX512F
-func (x Int64x2) RotateAllRightMasked(shift uint8, y Mask64x2) Int64x2
+func (x Int64x2) RotateAllRightMasked(shift uint8, mask Mask64x2) Int64x2
  
  // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
  //
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPRORQ, CPU Feature: AVX512F
-func (x Int64x4) RotateAllRightMasked(shift uint8, y Mask64x4) Int64x4
+func (x Int64x4) RotateAllRightMasked(shift uint8, mask Mask64x4) Int64x4
  
  // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
  //
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPRORQ, CPU Feature: AVX512F
-func (x Int64x8) RotateAllRightMasked(shift uint8, y Mask64x8) Int64x8
+func (x Int64x8) RotateAllRightMasked(shift uint8, mask Mask64x8) Int64x8
  
  // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
  //
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPRORD, CPU Feature: AVX512F
-func (x Uint32x4) RotateAllRightMasked(shift uint8, y Mask32x4) Uint32x4
+func (x Uint32x4) RotateAllRightMasked(shift uint8, mask Mask32x4) Uint32x4
  
  // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
  //
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPRORD, CPU Feature: AVX512F
-func (x Uint32x8) RotateAllRightMasked(shift uint8, y Mask32x8) Uint32x8
+func (x Uint32x8) RotateAllRightMasked(shift uint8, mask Mask32x8) Uint32x8
  
  // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
  //
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPRORD, CPU Feature: AVX512F
-func (x Uint32x16) RotateAllRightMasked(shift uint8, y Mask32x16) Uint32x16
+func (x Uint32x16) RotateAllRightMasked(shift uint8, mask Mask32x16) Uint32x16
  
  // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
  //
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPRORQ, CPU Feature: AVX512F
-func (x Uint64x2) RotateAllRightMasked(shift uint8, y Mask64x2) Uint64x2
+func (x Uint64x2) RotateAllRightMasked(shift uint8, mask Mask64x2) Uint64x2
  
  // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
  //
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPRORQ, CPU Feature: AVX512F
-func (x Uint64x4) RotateAllRightMasked(shift uint8, y Mask64x4) Uint64x4
+func (x Uint64x4) RotateAllRightMasked(shift uint8, mask Mask64x4) Uint64x4
  
  // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
  //
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPRORQ, CPU Feature: AVX512F
-func (x Uint64x8) RotateAllRightMasked(shift uint8, y Mask64x8) Uint64x8
+func (x Uint64x8) RotateAllRightMasked(shift uint8, mask Mask64x8) Uint64x8
  
  /* RotateLeft */
  
@@ -6870,62 +6870,62 @@ func (x Uint64x8) RotateLeft(y Uint64x8) Uint64x8
  // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPROLVD, CPU Feature: AVX512F
-func (x Int32x4) RotateLeftMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) RotateLeftMasked(y Int32x4, mask Mask32x4) Int32x4
  
  // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPROLVD, CPU Feature: AVX512F
-func (x Int32x8) RotateLeftMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) RotateLeftMasked(y Int32x8, mask Mask32x8) Int32x8
  
  // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPROLVD, CPU Feature: AVX512F
-func (x Int32x16) RotateLeftMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) RotateLeftMasked(y Int32x16, mask Mask32x16) Int32x16
  
  // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPROLVQ, CPU Feature: AVX512F
-func (x Int64x2) RotateLeftMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) RotateLeftMasked(y Int64x2, mask Mask64x2) Int64x2
  
  // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPROLVQ, CPU Feature: AVX512F
-func (x Int64x4) RotateLeftMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) RotateLeftMasked(y Int64x4, mask Mask64x4) Int64x4
  
  // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPROLVQ, CPU Feature: AVX512F
-func (x Int64x8) RotateLeftMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) RotateLeftMasked(y Int64x8, mask Mask64x8) Int64x8
  
  // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPROLVD, CPU Feature: AVX512F
-func (x Uint32x4) RotateLeftMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) RotateLeftMasked(y Uint32x4, mask Mask32x4) Uint32x4
  
  // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPROLVD, CPU Feature: AVX512F
-func (x Uint32x8) RotateLeftMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) RotateLeftMasked(y Uint32x8, mask Mask32x8) Uint32x8
  
  // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPROLVD, CPU Feature: AVX512F
-func (x Uint32x16) RotateLeftMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) RotateLeftMasked(y Uint32x16, mask Mask32x16) Uint32x16
  
  // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPROLVQ, CPU Feature: AVX512F
-func (x Uint64x2) RotateLeftMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) RotateLeftMasked(y Uint64x2, mask Mask64x2) Uint64x2
  
  // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPROLVQ, CPU Feature: AVX512F
-func (x Uint64x4) RotateLeftMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) RotateLeftMasked(y Uint64x4, mask Mask64x4) Uint64x4
  
  // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPROLVQ, CPU Feature: AVX512F
-func (x Uint64x8) RotateLeftMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) RotateLeftMasked(y Uint64x8, mask Mask64x8) Uint64x8
  
  /* RotateRight */
  
@@ -6994,62 +6994,62 @@ func (x Uint64x8) RotateRight(y Uint64x8) Uint64x8
  // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPRORVD, CPU Feature: AVX512F
-func (x Int32x4) RotateRightMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) RotateRightMasked(y Int32x4, mask Mask32x4) Int32x4
  
  // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPRORVD, CPU Feature: AVX512F
-func (x Int32x8) RotateRightMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) RotateRightMasked(y Int32x8, mask Mask32x8) Int32x8
  
  // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPRORVD, CPU Feature: AVX512F
-func (x Int32x16) RotateRightMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) RotateRightMasked(y Int32x16, mask Mask32x16) Int32x16
  
  // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPRORVQ, CPU Feature: AVX512F
-func (x Int64x2) RotateRightMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) RotateRightMasked(y Int64x2, mask Mask64x2) Int64x2
  
  // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPRORVQ, CPU Feature: AVX512F
-func (x Int64x4) RotateRightMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) RotateRightMasked(y Int64x4, mask Mask64x4) Int64x4
  
  // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPRORVQ, CPU Feature: AVX512F
-func (x Int64x8) RotateRightMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) RotateRightMasked(y Int64x8, mask Mask64x8) Int64x8
  
  // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPRORVD, CPU Feature: AVX512F
-func (x Uint32x4) RotateRightMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) RotateRightMasked(y Uint32x4, mask Mask32x4) Uint32x4
  
  // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPRORVD, CPU Feature: AVX512F
-func (x Uint32x8) RotateRightMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) RotateRightMasked(y Uint32x8, mask Mask32x8) Uint32x8
  
  // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPRORVD, CPU Feature: AVX512F
-func (x Uint32x16) RotateRightMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) RotateRightMasked(y Uint32x16, mask Mask32x16) Uint32x16
  
  // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPRORVQ, CPU Feature: AVX512F
-func (x Uint64x2) RotateRightMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) RotateRightMasked(y Uint64x2, mask Mask64x2) Uint64x2
  
  // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPRORVQ, CPU Feature: AVX512F
-func (x Uint64x4) RotateRightMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) RotateRightMasked(y Uint64x4, mask Mask64x4) Uint64x4
  
  // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  //
  // Asm: VPRORVQ, CPU Feature: AVX512F
-func (x Uint64x8) RotateRightMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) RotateRightMasked(y Uint64x8, mask Mask64x8) Uint64x8
  
  /* Round */
  
@@ -7124,42 +7124,42 @@ func (x Float64x8) RoundWithPrecision(prec uint8) Float64x8
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x4) RoundWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
+func (x Float32x4) RoundWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
  
  // RoundWithPrecisionMasked rounds elements with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x8) RoundWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
+func (x Float32x8) RoundWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
  
  // RoundWithPrecisionMasked rounds elements with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x16) RoundWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
+func (x Float32x16) RoundWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
  
  // RoundWithPrecisionMasked rounds elements with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x2) RoundWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
+func (x Float64x2) RoundWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
  
  // RoundWithPrecisionMasked rounds elements with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x4) RoundWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
+func (x Float64x4) RoundWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
  
  // RoundWithPrecisionMasked rounds elements with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x8) RoundWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
+func (x Float64x8) RoundWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
  
  /* SaturatedAdd */
  
@@ -7228,62 +7228,62 @@ func (x Uint16x32) SaturatedAdd(y Uint16x32) Uint16x32
  // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
  //
  // Asm: VPADDSB, CPU Feature: AVX512BW
-func (x Int8x16) SaturatedAddMasked(y Int8x16, z Mask8x16) Int8x16
+func (x Int8x16) SaturatedAddMasked(y Int8x16, mask Mask8x16) Int8x16
  
  // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
  //
  // Asm: VPADDSB, CPU Feature: AVX512BW
-func (x Int8x32) SaturatedAddMasked(y Int8x32, z Mask8x32) Int8x32
+func (x Int8x32) SaturatedAddMasked(y Int8x32, mask Mask8x32) Int8x32
  
  // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
  //
  // Asm: VPADDSB, CPU Feature: AVX512BW
-func (x Int8x64) SaturatedAddMasked(y Int8x64, z Mask8x64) Int8x64
+func (x Int8x64) SaturatedAddMasked(y Int8x64, mask Mask8x64) Int8x64
  
  // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
  //
  // Asm: VPADDSW, CPU Feature: AVX512BW
-func (x Int16x8) SaturatedAddMasked(y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) SaturatedAddMasked(y Int16x8, mask Mask16x8) Int16x8
  
  // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
  //
  // Asm: VPADDSW, CPU Feature: AVX512BW
-func (x Int16x16) SaturatedAddMasked(y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) SaturatedAddMasked(y Int16x16, mask Mask16x16) Int16x16
  
  // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
  //
  // Asm: VPADDSW, CPU Feature: AVX512BW
-func (x Int16x32) SaturatedAddMasked(y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) SaturatedAddMasked(y Int16x32, mask Mask16x32) Int16x32
  
  // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
  //
  // Asm: VPADDSB, CPU Feature: AVX512BW
-func (x Uint8x16) SaturatedAddMasked(y Uint8x16, z Mask8x16) Uint8x16
+func (x Uint8x16) SaturatedAddMasked(y Uint8x16, mask Mask8x16) Uint8x16
  
  // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
  //
  // Asm: VPADDSB, CPU Feature: AVX512BW
-func (x Uint8x32) SaturatedAddMasked(y Uint8x32, z Mask8x32) Uint8x32
+func (x Uint8x32) SaturatedAddMasked(y Uint8x32, mask Mask8x32) Uint8x32
  
  // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
  //
  // Asm: VPADDSB, CPU Feature: AVX512BW
-func (x Uint8x64) SaturatedAddMasked(y Uint8x64, z Mask8x64) Uint8x64
+func (x Uint8x64) SaturatedAddMasked(y Uint8x64, mask Mask8x64) Uint8x64
  
  // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
  //
  // Asm: VPADDSW, CPU Feature: AVX512BW
-func (x Uint16x8) SaturatedAddMasked(y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) SaturatedAddMasked(y Uint16x8, mask Mask16x8) Uint16x8
  
  // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
  //
  // Asm: VPADDSW, CPU Feature: AVX512BW
-func (x Uint16x16) SaturatedAddMasked(y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) SaturatedAddMasked(y Uint16x16, mask Mask16x16) Uint16x16
  
  // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
  //
  // Asm: VPADDSW, CPU Feature: AVX512BW
-func (x Uint16x32) SaturatedAddMasked(y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) SaturatedAddMasked(y Uint16x32, mask Mask16x32) Uint16x32
  
  /* SaturatedPairDotProdAccumulate */
  
@@ -7307,17 +7307,17 @@ func (x Int32x16) SaturatedPairDotProdAccumulate(y Int16x32, z Int16x32) Int32x1
  // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
  //
  // Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x4) SaturatedPairDotProdAccumulateMasked(y Int16x8, z Int16x8, u Mask32x4) Int32x4
+func (x Int32x4) SaturatedPairDotProdAccumulateMasked(y Int16x8, z Int16x8, mask Mask32x4) Int32x4
  
  // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
  //
  // Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x8) SaturatedPairDotProdAccumulateMasked(y Int16x16, z Int16x16, u Mask32x8) Int32x8
+func (x Int32x8) SaturatedPairDotProdAccumulateMasked(y Int16x16, z Int16x16, mask Mask32x8) Int32x8
  
  // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
  //
  // Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x16) SaturatedPairDotProdAccumulateMasked(y Int16x32, z Int16x32, u Mask32x16) Int32x16
+func (x Int32x16) SaturatedPairDotProdAccumulateMasked(y Int16x32, z Int16x32, mask Mask32x16) Int32x16
  
  /* SaturatedPairwiseAdd */
  
@@ -7414,62 +7414,62 @@ func (x Uint16x32) SaturatedSub(y Uint16x32) Uint16x32
  // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
  //
  // Asm: VPSUBSB, CPU Feature: AVX512BW
-func (x Int8x16) SaturatedSubMasked(y Int8x16, z Mask8x16) Int8x16
+func (x Int8x16) SaturatedSubMasked(y Int8x16, mask Mask8x16) Int8x16
  
  // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
  //
  // Asm: VPSUBSB, CPU Feature: AVX512BW
-func (x Int8x32) SaturatedSubMasked(y Int8x32, z Mask8x32) Int8x32
+func (x Int8x32) SaturatedSubMasked(y Int8x32, mask Mask8x32) Int8x32
  
  // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
  //
  // Asm: VPSUBSB, CPU Feature: AVX512BW
-func (x Int8x64) SaturatedSubMasked(y Int8x64, z Mask8x64) Int8x64
+func (x Int8x64) SaturatedSubMasked(y Int8x64, mask Mask8x64) Int8x64
  
  // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
  //
  // Asm: VPSUBSW, CPU Feature: AVX512BW
-func (x Int16x8) SaturatedSubMasked(y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) SaturatedSubMasked(y Int16x8, mask Mask16x8) Int16x8
  
  // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
  //
  // Asm: VPSUBSW, CPU Feature: AVX512BW
-func (x Int16x16) SaturatedSubMasked(y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) SaturatedSubMasked(y Int16x16, mask Mask16x16) Int16x16
  
  // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
  //
  // Asm: VPSUBSW, CPU Feature: AVX512BW
-func (x Int16x32) SaturatedSubMasked(y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) SaturatedSubMasked(y Int16x32, mask Mask16x32) Int16x32
  
  // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
  //
  // Asm: VPSUBSB, CPU Feature: AVX512BW
-func (x Uint8x16) SaturatedSubMasked(y Uint8x16, z Mask8x16) Uint8x16
+func (x Uint8x16) SaturatedSubMasked(y Uint8x16, mask Mask8x16) Uint8x16
  
  // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
  //
  // Asm: VPSUBSB, CPU Feature: AVX512BW
-func (x Uint8x32) SaturatedSubMasked(y Uint8x32, z Mask8x32) Uint8x32
+func (x Uint8x32) SaturatedSubMasked(y Uint8x32, mask Mask8x32) Uint8x32
  
  // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
  //
  // Asm: VPSUBSB, CPU Feature: AVX512BW
-func (x Uint8x64) SaturatedSubMasked(y Uint8x64, z Mask8x64) Uint8x64
+func (x Uint8x64) SaturatedSubMasked(y Uint8x64, mask Mask8x64) Uint8x64
  
  // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
  //
  // Asm: VPSUBSW, CPU Feature: AVX512BW
-func (x Uint16x8) SaturatedSubMasked(y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) SaturatedSubMasked(y Uint16x8, mask Mask16x8) Uint16x8
  
  // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
  //
  // Asm: VPSUBSW, CPU Feature: AVX512BW
-func (x Uint16x16) SaturatedSubMasked(y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) SaturatedSubMasked(y Uint16x16, mask Mask16x16) Uint16x16
  
  // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
  //
  // Asm: VPSUBSW, CPU Feature: AVX512BW
-func (x Uint16x32) SaturatedSubMasked(y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) SaturatedSubMasked(y Uint16x32, mask Mask16x32) Uint16x32
  
  /* SaturatedUnsignedSignedPairDotProd */
  
@@ -7497,19 +7497,19 @@ func (x Uint8x64) SaturatedUnsignedSignedPairDotProd(y Int8x64) Int16x32
  // yielding a vector of half as many elements with twice the input element size.
  //
  // Asm: VPMADDUBSW, CPU Feature: AVX512BW
-func (x Uint8x16) SaturatedUnsignedSignedPairDotProdMasked(y Int8x16, z Mask16x8) Int16x8
+func (x Uint8x16) SaturatedUnsignedSignedPairDotProdMasked(y Int8x16, mask Mask16x8) Int16x8
  
  // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
  // yielding a vector of half as many elements with twice the input element size.
  //
  // Asm: VPMADDUBSW, CPU Feature: AVX512BW
-func (x Uint8x32) SaturatedUnsignedSignedPairDotProdMasked(y Int8x32, z Mask16x16) Int16x16
+func (x Uint8x32) SaturatedUnsignedSignedPairDotProdMasked(y Int8x32, mask Mask16x16) Int16x16
  
  // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
  // yielding a vector of half as many elements with twice the input element size.
  //
  // Asm: VPMADDUBSW, CPU Feature: AVX512BW
-func (x Uint8x64) SaturatedUnsignedSignedPairDotProdMasked(y Int8x64, z Mask16x32) Int16x32
+func (x Uint8x64) SaturatedUnsignedSignedPairDotProdMasked(y Int8x64, mask Mask16x32) Int16x32
  
  /* SaturatedUnsignedSignedQuadDotProdAccumulate */
  
@@ -7548,32 +7548,32 @@ func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z In
  // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
  //
  // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, u Mask32x4) Int32x4
+func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, mask Mask32x4) Int32x4
  
  // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
  //
  // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, u Mask32x8) Int32x8
+func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, mask Mask32x8) Int32x8
  
  // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
  //
  // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, u Mask32x16) Int32x16
+func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, mask Mask32x16) Int32x16
  
  // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
  //
  // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, u Mask32x4) Uint32x4
+func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, mask Mask32x4) Uint32x4
  
  // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
  //
  // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, u Mask32x8) Uint32x8
+func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, mask Mask32x8) Uint32x8
  
  // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
  //
  // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, u Mask32x16) Uint32x16
+func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, mask Mask32x16) Uint32x16
  
  /* Set128 */
  
@@ -7951,7 +7951,7 @@ func (x Uint64x8) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint64x8) Uint64x8
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x8, mask Mask16x8) Int16x8
  
  // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
@@ -7959,7 +7959,7 @@ func (x Int16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x8, z Ma
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x16, mask Mask16x16) Int16x16
  
  // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
@@ -7967,7 +7967,7 @@ func (x Int16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x16, z
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x32, mask Mask16x32) Int16x32
  
  // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
@@ -7975,7 +7975,7 @@ func (x Int16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x32, z
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x4, mask Mask32x4) Int32x4
  
  // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
@@ -7983,7 +7983,7 @@ func (x Int32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x4, z Ma
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x8, mask Mask32x8) Int32x8
  
  // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
@@ -7991,7 +7991,7 @@ func (x Int32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x8, z Ma
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x16, mask Mask32x16) Int32x16
  
  // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
@@ -7999,7 +7999,7 @@ func (x Int32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x16, z
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x2, mask Mask64x2) Int64x2
  
  // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
@@ -8007,7 +8007,7 @@ func (x Int64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x2, z Ma
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x4, mask Mask64x4) Int64x4
  
  // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
@@ -8015,7 +8015,7 @@ func (x Int64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x4, z Ma
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x8, mask Mask64x8) Int64x8
  
  // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
@@ -8023,7 +8023,7 @@ func (x Int64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x8, z Ma
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x8, mask Mask16x8) Uint16x8
  
  // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
@@ -8031,7 +8031,7 @@ func (x Uint16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x8, z
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x16, mask Mask16x16) Uint16x16
  
  // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
@@ -8039,7 +8039,7 @@ func (x Uint16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x16,
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x32, mask Mask16x32) Uint16x32
  
  // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
@@ -8047,7 +8047,7 @@ func (x Uint16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x32,
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x4, mask Mask32x4) Uint32x4
  
  // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
@@ -8055,7 +8055,7 @@ func (x Uint32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x4, z
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x8, mask Mask32x8) Uint32x8
  
  // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
@@ -8063,7 +8063,7 @@ func (x Uint32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x8, z
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x16, mask Mask32x16) Uint32x16
  
  // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
@@ -8071,7 +8071,7 @@ func (x Uint32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x16,
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x2, mask Mask64x2) Uint64x2
  
  // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
@@ -8079,7 +8079,7 @@ func (x Uint64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x2, z
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x4, mask Mask64x4) Uint64x4
  
  // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
@@ -8087,99 +8087,99 @@ func (x Uint64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x4, z
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x8, mask Mask64x8) Uint64x8
  
  /* ShiftAllLeftMasked */
  
  // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLW, CPU Feature: AVX512BW
-func (x Int16x8) ShiftAllLeftMasked(y uint64, z Mask16x8) Int16x8
+func (x Int16x8) ShiftAllLeftMasked(y uint64, mask Mask16x8) Int16x8
  
  // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLW, CPU Feature: AVX512BW
-func (x Int16x16) ShiftAllLeftMasked(y uint64, z Mask16x16) Int16x16
+func (x Int16x16) ShiftAllLeftMasked(y uint64, mask Mask16x16) Int16x16
  
  // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLW, CPU Feature: AVX512BW
-func (x Int16x32) ShiftAllLeftMasked(y uint64, z Mask16x32) Int16x32
+func (x Int16x32) ShiftAllLeftMasked(y uint64, mask Mask16x32) Int16x32
  
  // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLD, CPU Feature: AVX512F
-func (x Int32x4) ShiftAllLeftMasked(y uint64, z Mask32x4) Int32x4
+func (x Int32x4) ShiftAllLeftMasked(y uint64, mask Mask32x4) Int32x4
  
  // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLD, CPU Feature: AVX512F
-func (x Int32x8) ShiftAllLeftMasked(y uint64, z Mask32x8) Int32x8
+func (x Int32x8) ShiftAllLeftMasked(y uint64, mask Mask32x8) Int32x8
  
  // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLD, CPU Feature: AVX512F
-func (x Int32x16) ShiftAllLeftMasked(y uint64, z Mask32x16) Int32x16
+func (x Int32x16) ShiftAllLeftMasked(y uint64, mask Mask32x16) Int32x16
  
  // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLQ, CPU Feature: AVX512F
-func (x Int64x2) ShiftAllLeftMasked(y uint64, z Mask64x2) Int64x2
+func (x Int64x2) ShiftAllLeftMasked(y uint64, mask Mask64x2) Int64x2
  
  // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLQ, CPU Feature: AVX512F
-func (x Int64x4) ShiftAllLeftMasked(y uint64, z Mask64x4) Int64x4
+func (x Int64x4) ShiftAllLeftMasked(y uint64, mask Mask64x4) Int64x4
  
  // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLQ, CPU Feature: AVX512F
-func (x Int64x8) ShiftAllLeftMasked(y uint64, z Mask64x8) Int64x8
+func (x Int64x8) ShiftAllLeftMasked(y uint64, mask Mask64x8) Int64x8
  
  // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLW, CPU Feature: AVX512BW
-func (x Uint16x8) ShiftAllLeftMasked(y uint64, z Mask16x8) Uint16x8
+func (x Uint16x8) ShiftAllLeftMasked(y uint64, mask Mask16x8) Uint16x8
  
  // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLW, CPU Feature: AVX512BW
-func (x Uint16x16) ShiftAllLeftMasked(y uint64, z Mask16x16) Uint16x16
+func (x Uint16x16) ShiftAllLeftMasked(y uint64, mask Mask16x16) Uint16x16
  
  // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLW, CPU Feature: AVX512BW
-func (x Uint16x32) ShiftAllLeftMasked(y uint64, z Mask16x32) Uint16x32
+func (x Uint16x32) ShiftAllLeftMasked(y uint64, mask Mask16x32) Uint16x32
  
  // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLD, CPU Feature: AVX512F
-func (x Uint32x4) ShiftAllLeftMasked(y uint64, z Mask32x4) Uint32x4
+func (x Uint32x4) ShiftAllLeftMasked(y uint64, mask Mask32x4) Uint32x4
  
  // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLD, CPU Feature: AVX512F
-func (x Uint32x8) ShiftAllLeftMasked(y uint64, z Mask32x8) Uint32x8
+func (x Uint32x8) ShiftAllLeftMasked(y uint64, mask Mask32x8) Uint32x8
  
  // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLD, CPU Feature: AVX512F
-func (x Uint32x16) ShiftAllLeftMasked(y uint64, z Mask32x16) Uint32x16
+func (x Uint32x16) ShiftAllLeftMasked(y uint64, mask Mask32x16) Uint32x16
  
  // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLQ, CPU Feature: AVX512F
-func (x Uint64x2) ShiftAllLeftMasked(y uint64, z Mask64x2) Uint64x2
+func (x Uint64x2) ShiftAllLeftMasked(y uint64, mask Mask64x2) Uint64x2
  
  // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLQ, CPU Feature: AVX512F
-func (x Uint64x4) ShiftAllLeftMasked(y uint64, z Mask64x4) Uint64x4
+func (x Uint64x4) ShiftAllLeftMasked(y uint64, mask Mask64x4) Uint64x4
  
  // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLQ, CPU Feature: AVX512F
-func (x Uint64x8) ShiftAllLeftMasked(y uint64, z Mask64x8) Uint64x8
+func (x Uint64x8) ShiftAllLeftMasked(y uint64, mask Mask64x8) Uint64x8
  
  /* ShiftAllRight */
  
@@ -8427,7 +8427,7 @@ func (x Uint64x8) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint64x8) Uint64x
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x8, mask Mask16x8) Int16x8
  
  // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
@@ -8435,7 +8435,7 @@ func (x Int16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x8, z M
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x16, mask Mask16x16) Int16x16
  
  // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
@@ -8443,7 +8443,7 @@ func (x Int16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x16, z
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x32, mask Mask16x32) Int16x32
  
  // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
@@ -8451,7 +8451,7 @@ func (x Int16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x32, z
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x4, mask Mask32x4) Int32x4
  
  // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
@@ -8459,7 +8459,7 @@ func (x Int32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x4, z M
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x8, mask Mask32x8) Int32x8
  
  // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
@@ -8467,7 +8467,7 @@ func (x Int32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x8, z M
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x16, mask Mask32x16) Int32x16
  
  // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
@@ -8475,7 +8475,7 @@ func (x Int32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x16, z
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x2, mask Mask64x2) Int64x2
  
  // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
@@ -8483,7 +8483,7 @@ func (x Int64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x2, z M
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x4, mask Mask64x4) Int64x4
  
  // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
@@ -8491,7 +8491,7 @@ func (x Int64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x4, z M
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x8, mask Mask64x8) Int64x8
  
  // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
@@ -8499,7 +8499,7 @@ func (x Int64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x8, z M
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x8, mask Mask16x8) Uint16x8
  
  // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
@@ -8507,7 +8507,7 @@ func (x Uint16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x8, z
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x16, mask Mask16x16) Uint16x16
  
  // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
@@ -8515,7 +8515,7 @@ func (x Uint16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x16,
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x32, mask Mask16x32) Uint16x32
  
  // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
@@ -8523,7 +8523,7 @@ func (x Uint16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x32,
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x4, mask Mask32x4) Uint32x4
  
  // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
@@ -8531,7 +8531,7 @@ func (x Uint32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x4, z
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x8, mask Mask32x8) Uint32x8
  
  // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
@@ -8539,7 +8539,7 @@ func (x Uint32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x8, z
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x16, mask Mask32x16) Uint32x16
  
  // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
@@ -8547,7 +8547,7 @@ func (x Uint32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x16,
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x2, mask Mask64x2) Uint64x2
  
  // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
@@ -8555,7 +8555,7 @@ func (x Uint64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x2, z
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x4, mask Mask64x4) Uint64x4
  
  // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
@@ -8563,99 +8563,99 @@ func (x Uint64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x4, z
  // shift is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x8, mask Mask64x8) Uint64x8
  
  /* ShiftAllRightMasked */
  
  // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  //
  // Asm: VPSRAW, CPU Feature: AVX512BW
-func (x Int16x8) ShiftAllRightMasked(y uint64, z Mask16x8) Int16x8
+func (x Int16x8) ShiftAllRightMasked(y uint64, mask Mask16x8) Int16x8
  
  // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  //
  // Asm: VPSRAW, CPU Feature: AVX512BW
-func (x Int16x16) ShiftAllRightMasked(y uint64, z Mask16x16) Int16x16
+func (x Int16x16) ShiftAllRightMasked(y uint64, mask Mask16x16) Int16x16
  
  // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  //
  // Asm: VPSRAW, CPU Feature: AVX512BW
-func (x Int16x32) ShiftAllRightMasked(y uint64, z Mask16x32) Int16x32
+func (x Int16x32) ShiftAllRightMasked(y uint64, mask Mask16x32) Int16x32
  
  // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  //
  // Asm: VPSRAD, CPU Feature: AVX512F
-func (x Int32x4) ShiftAllRightMasked(y uint64, z Mask32x4) Int32x4
+func (x Int32x4) ShiftAllRightMasked(y uint64, mask Mask32x4) Int32x4
  
  // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  //
  // Asm: VPSRAD, CPU Feature: AVX512F
-func (x Int32x8) ShiftAllRightMasked(y uint64, z Mask32x8) Int32x8
+func (x Int32x8) ShiftAllRightMasked(y uint64, mask Mask32x8) Int32x8
  
  // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  //
  // Asm: VPSRAD, CPU Feature: AVX512F
-func (x Int32x16) ShiftAllRightMasked(y uint64, z Mask32x16) Int32x16
+func (x Int32x16) ShiftAllRightMasked(y uint64, mask Mask32x16) Int32x16
  
  // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  //
  // Asm: VPSRAQ, CPU Feature: AVX512F
-func (x Int64x2) ShiftAllRightMasked(y uint64, z Mask64x2) Int64x2
+func (x Int64x2) ShiftAllRightMasked(y uint64, mask Mask64x2) Int64x2
  
  // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  //
  // Asm: VPSRAQ, CPU Feature: AVX512F
-func (x Int64x4) ShiftAllRightMasked(y uint64, z Mask64x4) Int64x4
+func (x Int64x4) ShiftAllRightMasked(y uint64, mask Mask64x4) Int64x4
  
  // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  //
  // Asm: VPSRAQ, CPU Feature: AVX512F
-func (x Int64x8) ShiftAllRightMasked(y uint64, z Mask64x8) Int64x8
+func (x Int64x8) ShiftAllRightMasked(y uint64, mask Mask64x8) Int64x8
  
  // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  //
  // Asm: VPSRLW, CPU Feature: AVX512BW
-func (x Uint16x8) ShiftAllRightMasked(y uint64, z Mask16x8) Uint16x8
+func (x Uint16x8) ShiftAllRightMasked(y uint64, mask Mask16x8) Uint16x8
  
  // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  //
  // Asm: VPSRLW, CPU Feature: AVX512BW
-func (x Uint16x16) ShiftAllRightMasked(y uint64, z Mask16x16) Uint16x16
+func (x Uint16x16) ShiftAllRightMasked(y uint64, mask Mask16x16) Uint16x16
  
  // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  //
  // Asm: VPSRLW, CPU Feature: AVX512BW
-func (x Uint16x32) ShiftAllRightMasked(y uint64, z Mask16x32) Uint16x32
+func (x Uint16x32) ShiftAllRightMasked(y uint64, mask Mask16x32) Uint16x32
  
  // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  //
  // Asm: VPSRLD, CPU Feature: AVX512F
-func (x Uint32x4) ShiftAllRightMasked(y uint64, z Mask32x4) Uint32x4
+func (x Uint32x4) ShiftAllRightMasked(y uint64, mask Mask32x4) Uint32x4
  
  // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  //
  // Asm: VPSRLD, CPU Feature: AVX512F
-func (x Uint32x8) ShiftAllRightMasked(y uint64, z Mask32x8) Uint32x8
+func (x Uint32x8) ShiftAllRightMasked(y uint64, mask Mask32x8) Uint32x8
  
  // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  //
  // Asm: VPSRLD, CPU Feature: AVX512F
-func (x Uint32x16) ShiftAllRightMasked(y uint64, z Mask32x16) Uint32x16
+func (x Uint32x16) ShiftAllRightMasked(y uint64, mask Mask32x16) Uint32x16
  
  // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  //
  // Asm: VPSRLQ, CPU Feature: AVX512F
-func (x Uint64x2) ShiftAllRightMasked(y uint64, z Mask64x2) Uint64x2
+func (x Uint64x2) ShiftAllRightMasked(y uint64, mask Mask64x2) Uint64x2
  
  // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  //
  // Asm: VPSRLQ, CPU Feature: AVX512F
-func (x Uint64x4) ShiftAllRightMasked(y uint64, z Mask64x4) Uint64x4
+func (x Uint64x4) ShiftAllRightMasked(y uint64, mask Mask64x4) Uint64x4
  
  // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  //
  // Asm: VPSRLQ, CPU Feature: AVX512F
-func (x Uint64x8) ShiftAllRightMasked(y uint64, z Mask64x8) Uint64x8
+func (x Uint64x8) ShiftAllRightMasked(y uint64, mask Mask64x8) Uint64x8
  
  /* ShiftLeft */
  
@@ -8865,201 +8865,201 @@ func (x Uint64x8) ShiftLeftAndFillUpperFrom(y Uint64x8, z Uint64x8) Uint64x8
  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
  // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftLeftAndFillUpperFromMasked(y Int16x8, z Int16x8, u Mask16x8) Int16x8
+func (x Int16x8) ShiftLeftAndFillUpperFromMasked(y Int16x8, z Int16x8, mask Mask16x8) Int16x8
  
  // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
  // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftLeftAndFillUpperFromMasked(y Int16x16, z Int16x16, u Mask16x16) Int16x16
+func (x Int16x16) ShiftLeftAndFillUpperFromMasked(y Int16x16, z Int16x16, mask Mask16x16) Int16x16
  
  // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
  // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftLeftAndFillUpperFromMasked(y Int16x32, z Int16x32, u Mask16x32) Int16x32
+func (x Int16x32) ShiftLeftAndFillUpperFromMasked(y Int16x32, z Int16x32, mask Mask16x32) Int16x32
  
  // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
  // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftLeftAndFillUpperFromMasked(y Int32x4, z Int32x4, u Mask32x4) Int32x4
+func (x Int32x4) ShiftLeftAndFillUpperFromMasked(y Int32x4, z Int32x4, mask Mask32x4) Int32x4
  
  // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
  // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftLeftAndFillUpperFromMasked(y Int32x8, z Int32x8, u Mask32x8) Int32x8
+func (x Int32x8) ShiftLeftAndFillUpperFromMasked(y Int32x8, z Int32x8, mask Mask32x8) Int32x8
  
  // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
  // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftLeftAndFillUpperFromMasked(y Int32x16, z Int32x16, u Mask32x16) Int32x16
+func (x Int32x16) ShiftLeftAndFillUpperFromMasked(y Int32x16, z Int32x16, mask Mask32x16) Int32x16
  
  // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
  // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftLeftAndFillUpperFromMasked(y Int64x2, z Int64x2, u Mask64x2) Int64x2
+func (x Int64x2) ShiftLeftAndFillUpperFromMasked(y Int64x2, z Int64x2, mask Mask64x2) Int64x2
  
  // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
  // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftLeftAndFillUpperFromMasked(y Int64x4, z Int64x4, u Mask64x4) Int64x4
+func (x Int64x4) ShiftLeftAndFillUpperFromMasked(y Int64x4, z Int64x4, mask Mask64x4) Int64x4
  
  // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
  // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftLeftAndFillUpperFromMasked(y Int64x8, z Int64x8, u Mask64x8) Int64x8
+func (x Int64x8) ShiftLeftAndFillUpperFromMasked(y Int64x8, z Int64x8, mask Mask64x8) Int64x8
  
  // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
  // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftLeftAndFillUpperFromMasked(y Uint16x8, z Uint16x8, u Mask16x8) Uint16x8
+func (x Uint16x8) ShiftLeftAndFillUpperFromMasked(y Uint16x8, z Uint16x8, mask Mask16x8) Uint16x8
  
  // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
  // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftLeftAndFillUpperFromMasked(y Uint16x16, z Uint16x16, u Mask16x16) Uint16x16
+func (x Uint16x16) ShiftLeftAndFillUpperFromMasked(y Uint16x16, z Uint16x16, mask Mask16x16) Uint16x16
  
  // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
  // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftLeftAndFillUpperFromMasked(y Uint16x32, z Uint16x32, u Mask16x32) Uint16x32
+func (x Uint16x32) ShiftLeftAndFillUpperFromMasked(y Uint16x32, z Uint16x32, mask Mask16x32) Uint16x32
  
  // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
  // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftLeftAndFillUpperFromMasked(y Uint32x4, z Uint32x4, u Mask32x4) Uint32x4
+func (x Uint32x4) ShiftLeftAndFillUpperFromMasked(y Uint32x4, z Uint32x4, mask Mask32x4) Uint32x4
  
  // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
  // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftLeftAndFillUpperFromMasked(y Uint32x8, z Uint32x8, u Mask32x8) Uint32x8
+func (x Uint32x8) ShiftLeftAndFillUpperFromMasked(y Uint32x8, z Uint32x8, mask Mask32x8) Uint32x8
  
  // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
  // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftLeftAndFillUpperFromMasked(y Uint32x16, z Uint32x16, u Mask32x16) Uint32x16
+func (x Uint32x16) ShiftLeftAndFillUpperFromMasked(y Uint32x16, z Uint32x16, mask Mask32x16) Uint32x16
  
  // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
  // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftLeftAndFillUpperFromMasked(y Uint64x2, z Uint64x2, u Mask64x2) Uint64x2
+func (x Uint64x2) ShiftLeftAndFillUpperFromMasked(y Uint64x2, z Uint64x2, mask Mask64x2) Uint64x2
  
  // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
  // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftLeftAndFillUpperFromMasked(y Uint64x4, z Uint64x4, u Mask64x4) Uint64x4
+func (x Uint64x4) ShiftLeftAndFillUpperFromMasked(y Uint64x4, z Uint64x4, mask Mask64x4) Uint64x4
  
  // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
  // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftLeftAndFillUpperFromMasked(y Uint64x8, z Uint64x8, u Mask64x8) Uint64x8
+func (x Uint64x8) ShiftLeftAndFillUpperFromMasked(y Uint64x8, z Uint64x8, mask Mask64x8) Uint64x8
  
  /* ShiftLeftMasked */
  
  // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLVW, CPU Feature: AVX512BW
-func (x Int16x8) ShiftLeftMasked(y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) ShiftLeftMasked(y Int16x8, mask Mask16x8) Int16x8
  
  // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLVW, CPU Feature: AVX512BW
-func (x Int16x16) ShiftLeftMasked(y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) ShiftLeftMasked(y Int16x16, mask Mask16x16) Int16x16
  
  // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLVW, CPU Feature: AVX512BW
-func (x Int16x32) ShiftLeftMasked(y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) ShiftLeftMasked(y Int16x32, mask Mask16x32) Int16x32
  
  // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLVD, CPU Feature: AVX512F
-func (x Int32x4) ShiftLeftMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) ShiftLeftMasked(y Int32x4, mask Mask32x4) Int32x4
  
  // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLVD, CPU Feature: AVX512F
-func (x Int32x8) ShiftLeftMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) ShiftLeftMasked(y Int32x8, mask Mask32x8) Int32x8
  
  // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLVD, CPU Feature: AVX512F
-func (x Int32x16) ShiftLeftMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) ShiftLeftMasked(y Int32x16, mask Mask32x16) Int32x16
  
  // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLVQ, CPU Feature: AVX512F
-func (x Int64x2) ShiftLeftMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) ShiftLeftMasked(y Int64x2, mask Mask64x2) Int64x2
  
  // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLVQ, CPU Feature: AVX512F
-func (x Int64x4) ShiftLeftMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) ShiftLeftMasked(y Int64x4, mask Mask64x4) Int64x4
  
  // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLVQ, CPU Feature: AVX512F
-func (x Int64x8) ShiftLeftMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) ShiftLeftMasked(y Int64x8, mask Mask64x8) Int64x8
  
  // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLVW, CPU Feature: AVX512BW
-func (x Uint16x8) ShiftLeftMasked(y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) ShiftLeftMasked(y Uint16x8, mask Mask16x8) Uint16x8
  
  // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLVW, CPU Feature: AVX512BW
-func (x Uint16x16) ShiftLeftMasked(y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) ShiftLeftMasked(y Uint16x16, mask Mask16x16) Uint16x16
  
  // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLVW, CPU Feature: AVX512BW
-func (x Uint16x32) ShiftLeftMasked(y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) ShiftLeftMasked(y Uint16x32, mask Mask16x32) Uint16x32
  
  // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLVD, CPU Feature: AVX512F
-func (x Uint32x4) ShiftLeftMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) ShiftLeftMasked(y Uint32x4, mask Mask32x4) Uint32x4
  
  // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLVD, CPU Feature: AVX512F
-func (x Uint32x8) ShiftLeftMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) ShiftLeftMasked(y Uint32x8, mask Mask32x8) Uint32x8
  
  // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLVD, CPU Feature: AVX512F
-func (x Uint32x16) ShiftLeftMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) ShiftLeftMasked(y Uint32x16, mask Mask32x16) Uint32x16
  
  // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLVQ, CPU Feature: AVX512F
-func (x Uint64x2) ShiftLeftMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) ShiftLeftMasked(y Uint64x2, mask Mask64x2) Uint64x2
  
  // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLVQ, CPU Feature: AVX512F
-func (x Uint64x4) ShiftLeftMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) ShiftLeftMasked(y Uint64x4, mask Mask64x4) Uint64x4
  
  // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  //
  // Asm: VPSLLVQ, CPU Feature: AVX512F
-func (x Uint64x8) ShiftLeftMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) ShiftLeftMasked(y Uint64x8, mask Mask64x8) Uint64x8
  
  /* ShiftRight */
  
@@ -9269,201 +9269,201 @@ func (x Uint64x8) ShiftRightAndFillUpperFrom(y Uint64x8, z Uint64x8) Uint64x8
  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
  // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftRightAndFillUpperFromMasked(y Int16x8, z Int16x8, u Mask16x8) Int16x8
+func (x Int16x8) ShiftRightAndFillUpperFromMasked(y Int16x8, z Int16x8, mask Mask16x8) Int16x8
  
  // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
  // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftRightAndFillUpperFromMasked(y Int16x16, z Int16x16, u Mask16x16) Int16x16
+func (x Int16x16) ShiftRightAndFillUpperFromMasked(y Int16x16, z Int16x16, mask Mask16x16) Int16x16
  
  // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
  // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftRightAndFillUpperFromMasked(y Int16x32, z Int16x32, u Mask16x32) Int16x32
+func (x Int16x32) ShiftRightAndFillUpperFromMasked(y Int16x32, z Int16x32, mask Mask16x32) Int16x32
  
  // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
  // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftRightAndFillUpperFromMasked(y Int32x4, z Int32x4, u Mask32x4) Int32x4
+func (x Int32x4) ShiftRightAndFillUpperFromMasked(y Int32x4, z Int32x4, mask Mask32x4) Int32x4
  
  // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
  // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftRightAndFillUpperFromMasked(y Int32x8, z Int32x8, u Mask32x8) Int32x8
+func (x Int32x8) ShiftRightAndFillUpperFromMasked(y Int32x8, z Int32x8, mask Mask32x8) Int32x8
  
  // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
  // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftRightAndFillUpperFromMasked(y Int32x16, z Int32x16, u Mask32x16) Int32x16
+func (x Int32x16) ShiftRightAndFillUpperFromMasked(y Int32x16, z Int32x16, mask Mask32x16) Int32x16
  
  // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
  // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftRightAndFillUpperFromMasked(y Int64x2, z Int64x2, u Mask64x2) Int64x2
+func (x Int64x2) ShiftRightAndFillUpperFromMasked(y Int64x2, z Int64x2, mask Mask64x2) Int64x2
  
  // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
  // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftRightAndFillUpperFromMasked(y Int64x4, z Int64x4, u Mask64x4) Int64x4
+func (x Int64x4) ShiftRightAndFillUpperFromMasked(y Int64x4, z Int64x4, mask Mask64x4) Int64x4
  
  // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
  // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftRightAndFillUpperFromMasked(y Int64x8, z Int64x8, u Mask64x8) Int64x8
+func (x Int64x8) ShiftRightAndFillUpperFromMasked(y Int64x8, z Int64x8, mask Mask64x8) Int64x8
  
  // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
  // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftRightAndFillUpperFromMasked(y Uint16x8, z Uint16x8, u Mask16x8) Uint16x8
+func (x Uint16x8) ShiftRightAndFillUpperFromMasked(y Uint16x8, z Uint16x8, mask Mask16x8) Uint16x8
  
  // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
  // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftRightAndFillUpperFromMasked(y Uint16x16, z Uint16x16, u Mask16x16) Uint16x16
+func (x Uint16x16) ShiftRightAndFillUpperFromMasked(y Uint16x16, z Uint16x16, mask Mask16x16) Uint16x16
  
  // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
  // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftRightAndFillUpperFromMasked(y Uint16x32, z Uint16x32, u Mask16x32) Uint16x32
+func (x Uint16x32) ShiftRightAndFillUpperFromMasked(y Uint16x32, z Uint16x32, mask Mask16x32) Uint16x32
  
  // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
  // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftRightAndFillUpperFromMasked(y Uint32x4, z Uint32x4, u Mask32x4) Uint32x4
+func (x Uint32x4) ShiftRightAndFillUpperFromMasked(y Uint32x4, z Uint32x4, mask Mask32x4) Uint32x4
  
  // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
  // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftRightAndFillUpperFromMasked(y Uint32x8, z Uint32x8, u Mask32x8) Uint32x8
+func (x Uint32x8) ShiftRightAndFillUpperFromMasked(y Uint32x8, z Uint32x8, mask Mask32x8) Uint32x8
  
  // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
  // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftRightAndFillUpperFromMasked(y Uint32x16, z Uint32x16, u Mask32x16) Uint32x16
+func (x Uint32x16) ShiftRightAndFillUpperFromMasked(y Uint32x16, z Uint32x16, mask Mask32x16) Uint32x16
  
  // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
  // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftRightAndFillUpperFromMasked(y Uint64x2, z Uint64x2, u Mask64x2) Uint64x2
+func (x Uint64x2) ShiftRightAndFillUpperFromMasked(y Uint64x2, z Uint64x2, mask Mask64x2) Uint64x2
  
  // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
  // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftRightAndFillUpperFromMasked(y Uint64x4, z Uint64x4, u Mask64x4) Uint64x4
+func (x Uint64x4) ShiftRightAndFillUpperFromMasked(y Uint64x4, z Uint64x4, mask Mask64x4) Uint64x4
  
  // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
  // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftRightAndFillUpperFromMasked(y Uint64x8, z Uint64x8, u Mask64x8) Uint64x8
+func (x Uint64x8) ShiftRightAndFillUpperFromMasked(y Uint64x8, z Uint64x8, mask Mask64x8) Uint64x8
  
  /* ShiftRightMasked */
  
  // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  //
  // Asm: VPSRAVW, CPU Feature: AVX512BW
-func (x Int16x8) ShiftRightMasked(y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) ShiftRightMasked(y Int16x8, mask Mask16x8) Int16x8
  
  // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  //
  // Asm: VPSRAVW, CPU Feature: AVX512BW
-func (x Int16x16) ShiftRightMasked(y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) ShiftRightMasked(y Int16x16, mask Mask16x16) Int16x16
  
  // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  //
  // Asm: VPSRAVW, CPU Feature: AVX512BW
-func (x Int16x32) ShiftRightMasked(y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) ShiftRightMasked(y Int16x32, mask Mask16x32) Int16x32
  
  // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  //
  // Asm: VPSRAVD, CPU Feature: AVX512F
-func (x Int32x4) ShiftRightMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) ShiftRightMasked(y Int32x4, mask Mask32x4) Int32x4
  
  // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  //
  // Asm: VPSRAVD, CPU Feature: AVX512F
-func (x Int32x8) ShiftRightMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) ShiftRightMasked(y Int32x8, mask Mask32x8) Int32x8
  
  // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  //
  // Asm: VPSRAVD, CPU Feature: AVX512F
-func (x Int32x16) ShiftRightMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) ShiftRightMasked(y Int32x16, mask Mask32x16) Int32x16
  
  // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  //
  // Asm: VPSRAVQ, CPU Feature: AVX512F
-func (x Int64x2) ShiftRightMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) ShiftRightMasked(y Int64x2, mask Mask64x2) Int64x2
  
  // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  //
  // Asm: VPSRAVQ, CPU Feature: AVX512F
-func (x Int64x4) ShiftRightMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) ShiftRightMasked(y Int64x4, mask Mask64x4) Int64x4
  
  // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  //
  // Asm: VPSRAVQ, CPU Feature: AVX512F
-func (x Int64x8) ShiftRightMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) ShiftRightMasked(y Int64x8, mask Mask64x8) Int64x8
  
  // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  //
  // Asm: VPSRLVW, CPU Feature: AVX512BW
-func (x Uint16x8) ShiftRightMasked(y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) ShiftRightMasked(y Uint16x8, mask Mask16x8) Uint16x8
  
  // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  //
  // Asm: VPSRLVW, CPU Feature: AVX512BW
-func (x Uint16x16) ShiftRightMasked(y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) ShiftRightMasked(y Uint16x16, mask Mask16x16) Uint16x16
  
  // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  //
  // Asm: VPSRLVW, CPU Feature: AVX512BW
-func (x Uint16x32) ShiftRightMasked(y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) ShiftRightMasked(y Uint16x32, mask Mask16x32) Uint16x32
  
  // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  //
  // Asm: VPSRLVD, CPU Feature: AVX512F
-func (x Uint32x4) ShiftRightMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) ShiftRightMasked(y Uint32x4, mask Mask32x4) Uint32x4
  
  // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  //
  // Asm: VPSRLVD, CPU Feature: AVX512F
-func (x Uint32x8) ShiftRightMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) ShiftRightMasked(y Uint32x8, mask Mask32x8) Uint32x8
  
  // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  //
  // Asm: VPSRLVD, CPU Feature: AVX512F
-func (x Uint32x16) ShiftRightMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) ShiftRightMasked(y Uint32x16, mask Mask32x16) Uint32x16
  
  // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  //
  // Asm: VPSRLVQ, CPU Feature: AVX512F
-func (x Uint64x2) ShiftRightMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) ShiftRightMasked(y Uint64x2, mask Mask64x2) Uint64x2
  
  // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  //
  // Asm: VPSRLVQ, CPU Feature: AVX512F
-func (x Uint64x4) ShiftRightMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) ShiftRightMasked(y Uint64x4, mask Mask64x4) Uint64x4
  
  // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  //
  // Asm: VPSRLVQ, CPU Feature: AVX512F
-func (x Uint64x8) ShiftRightMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) ShiftRightMasked(y Uint64x8, mask Mask64x8) Uint64x8
  
  /* Sign */
  
@@ -9540,32 +9540,32 @@ func (x Float64x8) Sqrt() Float64x8
  // SqrtMasked computes the square root of each element.
  //
  // Asm: VSQRTPS, CPU Feature: AVX512F
-func (x Float32x4) SqrtMasked(y Mask32x4) Float32x4
+func (x Float32x4) SqrtMasked(mask Mask32x4) Float32x4
  
  // SqrtMasked computes the square root of each element.
  //
  // Asm: VSQRTPS, CPU Feature: AVX512F
-func (x Float32x8) SqrtMasked(y Mask32x8) Float32x8
+func (x Float32x8) SqrtMasked(mask Mask32x8) Float32x8
  
  // SqrtMasked computes the square root of each element.
  //
  // Asm: VSQRTPS, CPU Feature: AVX512F
-func (x Float32x16) SqrtMasked(y Mask32x16) Float32x16
+func (x Float32x16) SqrtMasked(mask Mask32x16) Float32x16
  
  // SqrtMasked computes the square root of each element.
  //
  // Asm: VSQRTPD, CPU Feature: AVX512F
-func (x Float64x2) SqrtMasked(y Mask64x2) Float64x2
+func (x Float64x2) SqrtMasked(mask Mask64x2) Float64x2
  
  // SqrtMasked computes the square root of each element.
  //
  // Asm: VSQRTPD, CPU Feature: AVX512F
-func (x Float64x4) SqrtMasked(y Mask64x4) Float64x4
+func (x Float64x4) SqrtMasked(mask Mask64x4) Float64x4
  
  // SqrtMasked computes the square root of each element.
  //
  // Asm: VSQRTPD, CPU Feature: AVX512F
-func (x Float64x8) SqrtMasked(y Mask64x8) Float64x8
+func (x Float64x8) SqrtMasked(mask Mask64x8) Float64x8
  
  /* Sub */
  
@@ -9724,152 +9724,152 @@ func (x Uint64x8) Sub(y Uint64x8) Uint64x8
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VSUBPS, CPU Feature: AVX512F
-func (x Float32x4) SubMasked(y Float32x4, z Mask32x4) Float32x4
+func (x Float32x4) SubMasked(y Float32x4, mask Mask32x4) Float32x4
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VSUBPS, CPU Feature: AVX512F
-func (x Float32x8) SubMasked(y Float32x8, z Mask32x8) Float32x8
+func (x Float32x8) SubMasked(y Float32x8, mask Mask32x8) Float32x8
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VSUBPS, CPU Feature: AVX512F
-func (x Float32x16) SubMasked(y Float32x16, z Mask32x16) Float32x16
+func (x Float32x16) SubMasked(y Float32x16, mask Mask32x16) Float32x16
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VSUBPD, CPU Feature: AVX512F
-func (x Float64x2) SubMasked(y Float64x2, z Mask64x2) Float64x2
+func (x Float64x2) SubMasked(y Float64x2, mask Mask64x2) Float64x2
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VSUBPD, CPU Feature: AVX512F
-func (x Float64x4) SubMasked(y Float64x4, z Mask64x4) Float64x4
+func (x Float64x4) SubMasked(y Float64x4, mask Mask64x4) Float64x4
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VSUBPD, CPU Feature: AVX512F
-func (x Float64x8) SubMasked(y Float64x8, z Mask64x8) Float64x8
+func (x Float64x8) SubMasked(y Float64x8, mask Mask64x8) Float64x8
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBB, CPU Feature: AVX512BW
-func (x Int8x16) SubMasked(y Int8x16, z Mask8x16) Int8x16
+func (x Int8x16) SubMasked(y Int8x16, mask Mask8x16) Int8x16
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBB, CPU Feature: AVX512BW
-func (x Int8x32) SubMasked(y Int8x32, z Mask8x32) Int8x32
+func (x Int8x32) SubMasked(y Int8x32, mask Mask8x32) Int8x32
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBB, CPU Feature: AVX512BW
-func (x Int8x64) SubMasked(y Int8x64, z Mask8x64) Int8x64
+func (x Int8x64) SubMasked(y Int8x64, mask Mask8x64) Int8x64
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBW, CPU Feature: AVX512BW
-func (x Int16x8) SubMasked(y Int16x8, z Mask16x8) Int16x8
+func (x Int16x8) SubMasked(y Int16x8, mask Mask16x8) Int16x8
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBW, CPU Feature: AVX512BW
-func (x Int16x16) SubMasked(y Int16x16, z Mask16x16) Int16x16
+func (x Int16x16) SubMasked(y Int16x16, mask Mask16x16) Int16x16
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBW, CPU Feature: AVX512BW
-func (x Int16x32) SubMasked(y Int16x32, z Mask16x32) Int16x32
+func (x Int16x32) SubMasked(y Int16x32, mask Mask16x32) Int16x32
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBD, CPU Feature: AVX512F
-func (x Int32x4) SubMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) SubMasked(y Int32x4, mask Mask32x4) Int32x4
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBD, CPU Feature: AVX512F
-func (x Int32x8) SubMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) SubMasked(y Int32x8, mask Mask32x8) Int32x8
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBD, CPU Feature: AVX512F
-func (x Int32x16) SubMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) SubMasked(y Int32x16, mask Mask32x16) Int32x16
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBQ, CPU Feature: AVX512F
-func (x Int64x2) SubMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) SubMasked(y Int64x2, mask Mask64x2) Int64x2
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBQ, CPU Feature: AVX512F
-func (x Int64x4) SubMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) SubMasked(y Int64x4, mask Mask64x4) Int64x4
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBQ, CPU Feature: AVX512F
-func (x Int64x8) SubMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) SubMasked(y Int64x8, mask Mask64x8) Int64x8
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBB, CPU Feature: AVX512BW
-func (x Uint8x16) SubMasked(y Uint8x16, z Mask8x16) Uint8x16
+func (x Uint8x16) SubMasked(y Uint8x16, mask Mask8x16) Uint8x16
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBB, CPU Feature: AVX512BW
-func (x Uint8x32) SubMasked(y Uint8x32, z Mask8x32) Uint8x32
+func (x Uint8x32) SubMasked(y Uint8x32, mask Mask8x32) Uint8x32
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBB, CPU Feature: AVX512BW
-func (x Uint8x64) SubMasked(y Uint8x64, z Mask8x64) Uint8x64
+func (x Uint8x64) SubMasked(y Uint8x64, mask Mask8x64) Uint8x64
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBW, CPU Feature: AVX512BW
-func (x Uint16x8) SubMasked(y Uint16x8, z Mask16x8) Uint16x8
+func (x Uint16x8) SubMasked(y Uint16x8, mask Mask16x8) Uint16x8
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBW, CPU Feature: AVX512BW
-func (x Uint16x16) SubMasked(y Uint16x16, z Mask16x16) Uint16x16
+func (x Uint16x16) SubMasked(y Uint16x16, mask Mask16x16) Uint16x16
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBW, CPU Feature: AVX512BW
-func (x Uint16x32) SubMasked(y Uint16x32, z Mask16x32) Uint16x32
+func (x Uint16x32) SubMasked(y Uint16x32, mask Mask16x32) Uint16x32
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBD, CPU Feature: AVX512F
-func (x Uint32x4) SubMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) SubMasked(y Uint32x4, mask Mask32x4) Uint32x4
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBD, CPU Feature: AVX512F
-func (x Uint32x8) SubMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) SubMasked(y Uint32x8, mask Mask32x8) Uint32x8
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBD, CPU Feature: AVX512F
-func (x Uint32x16) SubMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) SubMasked(y Uint32x16, mask Mask32x16) Uint32x16
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBQ, CPU Feature: AVX512F
-func (x Uint64x2) SubMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) SubMasked(y Uint64x2, mask Mask64x2) Uint64x2
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBQ, CPU Feature: AVX512F
-func (x Uint64x4) SubMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) SubMasked(y Uint64x4, mask Mask64x4) Uint64x4
  
  // SubMasked subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBQ, CPU Feature: AVX512F
-func (x Uint64x8) SubMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) SubMasked(y Uint64x8, mask Mask64x8) Uint64x8
  
  /* Trunc */
  
@@ -9944,42 +9944,42 @@ func (x Float64x8) TruncWithPrecision(prec uint8) Float64x8
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x4) TruncWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
+func (x Float32x4) TruncWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
  
  // TruncWithPrecisionMasked truncates elements with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x8) TruncWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
+func (x Float32x8) TruncWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
  
  // TruncWithPrecisionMasked truncates elements with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x16) TruncWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
+func (x Float32x16) TruncWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
  
  // TruncWithPrecisionMasked truncates elements with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x2) TruncWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
+func (x Float64x2) TruncWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
  
  // TruncWithPrecisionMasked truncates elements with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x4) TruncWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
+func (x Float64x4) TruncWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
  
  // TruncWithPrecisionMasked truncates elements with specified precision.
  //
  // prec is expected to be a constant, non-constant value will trigger a runtime panic.
  //
  // Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x8) TruncWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
+func (x Float64x8) TruncWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
  
  /* UnsignedSignedQuadDotProdAccumulate */
  
@@ -10018,32 +10018,32 @@ func (x Uint32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Ui
  // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
  //
  // Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int32x4) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, u Mask32x4) Int32x4
+func (x Int32x4) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, mask Mask32x4) Int32x4
  
  // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
  //
  // Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int32x8) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, u Mask32x8) Int32x8
+func (x Int32x8) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, mask Mask32x8) Int32x8
  
  // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
  //
  // Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int32x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, u Mask32x16) Int32x16
+func (x Int32x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, mask Mask32x16) Int32x16
  
  // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
  //
  // Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Uint32x4) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, u Mask32x4) Uint32x4
+func (x Uint32x4) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, mask Mask32x4) Uint32x4
  
  // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
  //
  // Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Uint32x8) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, u Mask32x8) Uint32x8
+func (x Uint32x8) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, mask Mask32x8) Uint32x8
  
  // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
  //
  // Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Uint32x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, u Mask32x16) Uint32x16
+func (x Uint32x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, mask Mask32x16) Uint32x16
  
  /* Xor */
  
@@ -10152,62 +10152,62 @@ func (x Uint64x8) Xor(y Uint64x8) Uint64x8
  // XorMasked performs a masked bitwise XOR operation between two vectors.
  //
  // Asm: VPXORD, CPU Feature: AVX512F
-func (x Int32x4) XorMasked(y Int32x4, z Mask32x4) Int32x4
+func (x Int32x4) XorMasked(y Int32x4, mask Mask32x4) Int32x4
  
  // XorMasked performs a masked bitwise XOR operation between two vectors.
  //
  // Asm: VPXORD, CPU Feature: AVX512F
-func (x Int32x8) XorMasked(y Int32x8, z Mask32x8) Int32x8
+func (x Int32x8) XorMasked(y Int32x8, mask Mask32x8) Int32x8
  
  // XorMasked performs a masked bitwise XOR operation between two vectors.
  //
  // Asm: VPXORD, CPU Feature: AVX512F
-func (x Int32x16) XorMasked(y Int32x16, z Mask32x16) Int32x16
+func (x Int32x16) XorMasked(y Int32x16, mask Mask32x16) Int32x16
  
  // XorMasked performs a masked bitwise XOR operation between two vectors.
  //
  // Asm: VPXORQ, CPU Feature: AVX512F
-func (x Int64x2) XorMasked(y Int64x2, z Mask64x2) Int64x2
+func (x Int64x2) XorMasked(y Int64x2, mask Mask64x2) Int64x2
  
  // XorMasked performs a masked bitwise XOR operation between two vectors.
  //
  // Asm: VPXORQ, CPU Feature: AVX512F
-func (x Int64x4) XorMasked(y Int64x4, z Mask64x4) Int64x4
+func (x Int64x4) XorMasked(y Int64x4, mask Mask64x4) Int64x4
  
  // XorMasked performs a masked bitwise XOR operation between two vectors.
  //
  // Asm: VPXORQ, CPU Feature: AVX512F
-func (x Int64x8) XorMasked(y Int64x8, z Mask64x8) Int64x8
+func (x Int64x8) XorMasked(y Int64x8, mask Mask64x8) Int64x8
  
  // XorMasked performs a masked bitwise XOR operation between two vectors.
  //
  // Asm: VPXORD, CPU Feature: AVX512F
-func (x Uint32x4) XorMasked(y Uint32x4, z Mask32x4) Uint32x4
+func (x Uint32x4) XorMasked(y Uint32x4, mask Mask32x4) Uint32x4
  
  // XorMasked performs a masked bitwise XOR operation between two vectors.
  //
  // Asm: VPXORD, CPU Feature: AVX512F
-func (x Uint32x8) XorMasked(y Uint32x8, z Mask32x8) Uint32x8
+func (x Uint32x8) XorMasked(y Uint32x8, mask Mask32x8) Uint32x8
  
  // XorMasked performs a masked bitwise XOR operation between two vectors.
  //
  // Asm: VPXORD, CPU Feature: AVX512F
-func (x Uint32x16) XorMasked(y Uint32x16, z Mask32x16) Uint32x16
+func (x Uint32x16) XorMasked(y Uint32x16, mask Mask32x16) Uint32x16
  
  // XorMasked performs a masked bitwise XOR operation between two vectors.
  //
  // Asm: VPXORQ, CPU Feature: AVX512F
-func (x Uint64x2) XorMasked(y Uint64x2, z Mask64x2) Uint64x2
+func (x Uint64x2) XorMasked(y Uint64x2, mask Mask64x2) Uint64x2
  
  // XorMasked performs a masked bitwise XOR operation between two vectors.
  //
  // Asm: VPXORQ, CPU Feature: AVX512F
-func (x Uint64x4) XorMasked(y Uint64x4, z Mask64x4) Uint64x4
+func (x Uint64x4) XorMasked(y Uint64x4, mask Mask64x4) Uint64x4
  
  // XorMasked performs a masked bitwise XOR operation between two vectors.
  //
  // Asm: VPXORQ, CPU Feature: AVX512F
-func (x Uint64x8) XorMasked(y Uint64x8, z Mask64x8) Uint64x8
+func (x Uint64x8) XorMasked(y Uint64x8, mask Mask64x8) Uint64x8
  
  // Float64x2 converts from Float32x4 to Float64x2
  func (from Float32x4) AsFloat64x2() (to Float64x2)
author	Junyang Shao <shaojunyang@google.com>
	Mon, 14 Jul 2025 19:44:57 +0000 (19:44 +0000)
committer	Junyang Shao <shaojunyang@google.com>
	Tue, 15 Jul 2025 21:54:09 +0000 (14:54 -0700)