From 03a3887f31264e778c9aaf62247a478eedd3633d Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Wed, 16 Jul 2025 17:02:47 +0000
Subject: [PATCH] [dev.simd] simd: clean up masked op doc

This CL is generated by CL 688395.

Change-Id: I40c6a64c6002b28040e6af746481b4deb2049179
Reviewed-on: https://go-review.googlesource.com/c/go/+/688396
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 src/simd/ops_amd64.go | 1940 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 1786 insertions(+), 154 deletions(-)

diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go
index 4624105d79..a5c2f2d5c2 100644
--- a/src/simd/ops_amd64.go
+++ b/src/simd/ops_amd64.go
@@ -70,61 +70,85 @@ func (x Int64x8) Absolute() Int64x8
 
 // AbsoluteMasked computes the absolute value of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPABSB, CPU Feature: AVX512BW
 func (x Int8x16) AbsoluteMasked(mask Mask8x16) Int8x16
 
 // AbsoluteMasked computes the absolute value of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPABSB, CPU Feature: AVX512BW
 func (x Int8x32) AbsoluteMasked(mask Mask8x32) Int8x32
 
 // AbsoluteMasked computes the absolute value of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPABSB, CPU Feature: AVX512BW
 func (x Int8x64) AbsoluteMasked(mask Mask8x64) Int8x64
 
 // AbsoluteMasked computes the absolute value of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPABSW, CPU Feature: AVX512BW
 func (x Int16x8) AbsoluteMasked(mask Mask16x8) Int16x8
 
 // AbsoluteMasked computes the absolute value of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPABSW, CPU Feature: AVX512BW
 func (x Int16x16) AbsoluteMasked(mask Mask16x16) Int16x16
 
 // AbsoluteMasked computes the absolute value of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPABSW, CPU Feature: AVX512BW
 func (x Int16x32) AbsoluteMasked(mask Mask16x32) Int16x32
 
 // AbsoluteMasked computes the absolute value of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPABSD, CPU Feature: AVX512F
 func (x Int32x4) AbsoluteMasked(mask Mask32x4) Int32x4
 
 // AbsoluteMasked computes the absolute value of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPABSD, CPU Feature: AVX512F
 func (x Int32x8) AbsoluteMasked(mask Mask32x8) Int32x8
 
 // AbsoluteMasked computes the absolute value of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPABSD, CPU Feature: AVX512F
 func (x Int32x16) AbsoluteMasked(mask Mask32x16) Int32x16
 
 // AbsoluteMasked computes the absolute value of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPABSQ, CPU Feature: AVX512F
 func (x Int64x2) AbsoluteMasked(mask Mask64x2) Int64x2
 
 // AbsoluteMasked computes the absolute value of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPABSQ, CPU Feature: AVX512F
 func (x Int64x4) AbsoluteMasked(mask Mask64x4) Int64x4
 
 // AbsoluteMasked computes the absolute value of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPABSQ, CPU Feature: AVX512F
 func (x Int64x8) AbsoluteMasked(mask Mask64x8) Int64x8
 
@@ -284,151 +308,211 @@ func (x Uint64x8) Add(y Uint64x8) Uint64x8
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VADDPS, CPU Feature: AVX512F
 func (x Float32x4) AddMasked(y Float32x4, mask Mask32x4) Float32x4
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VADDPS, CPU Feature: AVX512F
 func (x Float32x8) AddMasked(y Float32x8, mask Mask32x8) Float32x8
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VADDPS, CPU Feature: AVX512F
 func (x Float32x16) AddMasked(y Float32x16, mask Mask32x16) Float32x16
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VADDPD, CPU Feature: AVX512F
 func (x Float64x2) AddMasked(y Float64x2, mask Mask64x2) Float64x2
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VADDPD, CPU Feature: AVX512F
 func (x Float64x4) AddMasked(y Float64x4, mask Mask64x4) Float64x4
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VADDPD, CPU Feature: AVX512F
 func (x Float64x8) AddMasked(y Float64x8, mask Mask64x8) Float64x8
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDB, CPU Feature: AVX512BW
 func (x Int8x16) AddMasked(y Int8x16, mask Mask8x16) Int8x16
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDB, CPU Feature: AVX512BW
 func (x Int8x32) AddMasked(y Int8x32, mask Mask8x32) Int8x32
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDB, CPU Feature: AVX512BW
 func (x Int8x64) AddMasked(y Int8x64, mask Mask8x64) Int8x64
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDW, CPU Feature: AVX512BW
 func (x Int16x8) AddMasked(y Int16x8, mask Mask16x8) Int16x8
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDW, CPU Feature: AVX512BW
 func (x Int16x16) AddMasked(y Int16x16, mask Mask16x16) Int16x16
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDW, CPU Feature: AVX512BW
 func (x Int16x32) AddMasked(y Int16x32, mask Mask16x32) Int16x32
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDD, CPU Feature: AVX512F
 func (x Int32x4) AddMasked(y Int32x4, mask Mask32x4) Int32x4
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDD, CPU Feature: AVX512F
 func (x Int32x8) AddMasked(y Int32x8, mask Mask32x8) Int32x8
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDD, CPU Feature: AVX512F
 func (x Int32x16) AddMasked(y Int32x16, mask Mask32x16) Int32x16
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDQ, CPU Feature: AVX512F
 func (x Int64x2) AddMasked(y Int64x2, mask Mask64x2) Int64x2
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDQ, CPU Feature: AVX512F
 func (x Int64x4) AddMasked(y Int64x4, mask Mask64x4) Int64x4
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDQ, CPU Feature: AVX512F
 func (x Int64x8) AddMasked(y Int64x8, mask Mask64x8) Int64x8
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDB, CPU Feature: AVX512BW
 func (x Uint8x16) AddMasked(y Uint8x16, mask Mask8x16) Uint8x16
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDB, CPU Feature: AVX512BW
 func (x Uint8x32) AddMasked(y Uint8x32, mask Mask8x32) Uint8x32
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDB, CPU Feature: AVX512BW
 func (x Uint8x64) AddMasked(y Uint8x64, mask Mask8x64) Uint8x64
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDW, CPU Feature: AVX512BW
 func (x Uint16x8) AddMasked(y Uint16x8, mask Mask16x8) Uint16x8
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDW, CPU Feature: AVX512BW
 func (x Uint16x16) AddMasked(y Uint16x16, mask Mask16x16) Uint16x16
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDW, CPU Feature: AVX512BW
 func (x Uint16x32) AddMasked(y Uint16x32, mask Mask16x32) Uint16x32
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDD, CPU Feature: AVX512F
 func (x Uint32x4) AddMasked(y Uint32x4, mask Mask32x4) Uint32x4
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDD, CPU Feature: AVX512F
 func (x Uint32x8) AddMasked(y Uint32x8, mask Mask32x8) Uint32x8
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDD, CPU Feature: AVX512F
 func (x Uint32x16) AddMasked(y Uint32x16, mask Mask32x16) Uint32x16
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDQ, CPU Feature: AVX512F
 func (x Uint64x2) AddMasked(y Uint64x2, mask Mask64x2) Uint64x2
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDQ, CPU Feature: AVX512F
 func (x Uint64x4) AddMasked(y Uint64x4, mask Mask64x4) Uint64x4
 
 // AddMasked adds corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDQ, CPU Feature: AVX512F
 func (x Uint64x8) AddMasked(y Uint64x8, mask Mask64x8) Uint64x8
 
@@ -486,7 +570,7 @@ func (x Int32x4) And(y Int32x4) Int32x4
 // Asm: VPAND, CPU Feature: AVX2
 func (x Int32x8) And(y Int32x8) Int32x8
 
-// And performs a masked bitwise AND operation between two vectors.
+// And performs a bitwise AND operation between two vectors.
 //
 // Asm: VPANDD, CPU Feature: AVX512F
 func (x Int32x16) And(y Int32x16) Int32x16
@@ -501,7 +585,7 @@ func (x Int64x2) And(y Int64x2) Int64x2
 // Asm: VPAND, CPU Feature: AVX2
 func (x Int64x4) And(y Int64x4) Int64x4
 
-// And performs a masked bitwise AND operation between two vectors.
+// And performs a bitwise AND operation between two vectors.
 //
 // Asm: VPANDQ, CPU Feature: AVX512F
 func (x Int64x8) And(y Int64x8) Int64x8
@@ -536,7 +620,7 @@ func (x Uint32x4) And(y Uint32x4) Uint32x4
 // Asm: VPAND, CPU Feature: AVX2
 func (x Uint32x8) And(y Uint32x8) Uint32x8
 
-// And performs a masked bitwise AND operation between two vectors.
+// And performs a bitwise AND operation between two vectors.
 //
 // Asm: VPANDD, CPU Feature: AVX512F
 func (x Uint32x16) And(y Uint32x16) Uint32x16
@@ -551,69 +635,93 @@ func (x Uint64x2) And(y Uint64x2) Uint64x2
 // Asm: VPAND, CPU Feature: AVX2
 func (x Uint64x4) And(y Uint64x4) Uint64x4
 
-// And performs a masked bitwise AND operation between two vectors.
+// And performs a bitwise AND operation between two vectors.
 //
 // Asm: VPANDQ, CPU Feature: AVX512F
 func (x Uint64x8) And(y Uint64x8) Uint64x8
 
 /* AndMasked */
 
-// AndMasked performs a masked bitwise AND operation between two vectors.
+// AndMasked performs a bitwise AND operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPANDD, CPU Feature: AVX512F
 func (x Int32x4) AndMasked(y Int32x4, mask Mask32x4) Int32x4
 
-// AndMasked performs a masked bitwise AND operation between two vectors.
+// AndMasked performs a bitwise AND operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPANDD, CPU Feature: AVX512F
 func (x Int32x8) AndMasked(y Int32x8, mask Mask32x8) Int32x8
 
-// AndMasked performs a masked bitwise AND operation between two vectors.
+// AndMasked performs a bitwise AND operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPANDD, CPU Feature: AVX512F
 func (x Int32x16) AndMasked(y Int32x16, mask Mask32x16) Int32x16
 
-// AndMasked performs a masked bitwise AND operation between two vectors.
+// AndMasked performs a bitwise AND operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPANDQ, CPU Feature: AVX512F
 func (x Int64x2) AndMasked(y Int64x2, mask Mask64x2) Int64x2
 
-// AndMasked performs a masked bitwise AND operation between two vectors.
+// AndMasked performs a bitwise AND operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPANDQ, CPU Feature: AVX512F
 func (x Int64x4) AndMasked(y Int64x4, mask Mask64x4) Int64x4
 
-// AndMasked performs a masked bitwise AND operation between two vectors.
+// AndMasked performs a bitwise AND operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPANDQ, CPU Feature: AVX512F
 func (x Int64x8) AndMasked(y Int64x8, mask Mask64x8) Int64x8
 
-// AndMasked performs a masked bitwise AND operation between two vectors.
+// AndMasked performs a bitwise AND operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPANDD, CPU Feature: AVX512F
 func (x Uint32x4) AndMasked(y Uint32x4, mask Mask32x4) Uint32x4
 
-// AndMasked performs a masked bitwise AND operation between two vectors.
+// AndMasked performs a bitwise AND operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPANDD, CPU Feature: AVX512F
 func (x Uint32x8) AndMasked(y Uint32x8, mask Mask32x8) Uint32x8
 
-// AndMasked performs a masked bitwise AND operation between two vectors.
+// AndMasked performs a bitwise AND operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPANDD, CPU Feature: AVX512F
 func (x Uint32x16) AndMasked(y Uint32x16, mask Mask32x16) Uint32x16
 
-// AndMasked performs a masked bitwise AND operation between two vectors.
+// AndMasked performs a bitwise AND operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPANDQ, CPU Feature: AVX512F
 func (x Uint64x2) AndMasked(y Uint64x2, mask Mask64x2) Uint64x2
 
-// AndMasked performs a masked bitwise AND operation between two vectors.
+// AndMasked performs a bitwise AND operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPANDQ, CPU Feature: AVX512F
 func (x Uint64x4) AndMasked(y Uint64x4, mask Mask64x4) Uint64x4
 
-// AndMasked performs a masked bitwise AND operation between two vectors.
+// AndMasked performs a bitwise AND operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPANDQ, CPU Feature: AVX512F
 func (x Uint64x8) AndMasked(y Uint64x8, mask Mask64x8) Uint64x8
@@ -724,61 +832,85 @@ func (x Uint64x8) AndNot(y Uint64x8) Uint64x8
 
 // AndNotMasked performs a bitwise x &^ y.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPANDND, CPU Feature: AVX512F
 func (x Int32x4) AndNotMasked(y Int32x4, mask Mask32x4) Int32x4
 
 // AndNotMasked performs a bitwise x &^ y.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPANDND, CPU Feature: AVX512F
 func (x Int32x8) AndNotMasked(y Int32x8, mask Mask32x8) Int32x8
 
 // AndNotMasked performs a bitwise x &^ y.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPANDND, CPU Feature: AVX512F
 func (x Int32x16) AndNotMasked(y Int32x16, mask Mask32x16) Int32x16
 
 // AndNotMasked performs a bitwise x &^ y.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPANDNQ, CPU Feature: AVX512F
 func (x Int64x2) AndNotMasked(y Int64x2, mask Mask64x2) Int64x2
 
 // AndNotMasked performs a bitwise x &^ y.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPANDNQ, CPU Feature: AVX512F
 func (x Int64x4) AndNotMasked(y Int64x4, mask Mask64x4) Int64x4
 
 // AndNotMasked performs a bitwise x &^ y.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPANDNQ, CPU Feature: AVX512F
 func (x Int64x8) AndNotMasked(y Int64x8, mask Mask64x8) Int64x8
 
 // AndNotMasked performs a bitwise x &^ y.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPANDND, CPU Feature: AVX512F
 func (x Uint32x4) AndNotMasked(y Uint32x4, mask Mask32x4) Uint32x4
 
 // AndNotMasked performs a bitwise x &^ y.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPANDND, CPU Feature: AVX512F
 func (x Uint32x8) AndNotMasked(y Uint32x8, mask Mask32x8) Uint32x8
 
 // AndNotMasked performs a bitwise x &^ y.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPANDND, CPU Feature: AVX512F
 func (x Uint32x16) AndNotMasked(y Uint32x16, mask Mask32x16) Uint32x16
 
 // AndNotMasked performs a bitwise x &^ y.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPANDNQ, CPU Feature: AVX512F
 func (x Uint64x2) AndNotMasked(y Uint64x2, mask Mask64x2) Uint64x2
 
 // AndNotMasked performs a bitwise x &^ y.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPANDNQ, CPU Feature: AVX512F
 func (x Uint64x4) AndNotMasked(y Uint64x4, mask Mask64x4) Uint64x4
 
 // AndNotMasked performs a bitwise x &^ y.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPANDNQ, CPU Feature: AVX512F
 func (x Uint64x8) AndNotMasked(y Uint64x8, mask Mask64x8) Uint64x8
 
@@ -818,31 +950,43 @@ func (x Float64x8) ApproximateReciprocal() Float64x8
 
 // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VRCP14PS, CPU Feature: AVX512F
 func (x Float32x4) ApproximateReciprocalMasked(mask Mask32x4) Float32x4
 
 // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VRCP14PS, CPU Feature: AVX512F
 func (x Float32x8) ApproximateReciprocalMasked(mask Mask32x8) Float32x8
 
 // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VRCP14PS, CPU Feature: AVX512F
 func (x Float32x16) ApproximateReciprocalMasked(mask Mask32x16) Float32x16
 
 // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VRCP14PD, CPU Feature: AVX512F
 func (x Float64x2) ApproximateReciprocalMasked(mask Mask64x2) Float64x2
 
 // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VRCP14PD, CPU Feature: AVX512F
 func (x Float64x4) ApproximateReciprocalMasked(mask Mask64x4) Float64x4
 
 // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VRCP14PD, CPU Feature: AVX512F
 func (x Float64x8) ApproximateReciprocalMasked(mask Mask64x8) Float64x8
 
@@ -882,31 +1026,43 @@ func (x Float64x8) ApproximateReciprocalOfSqrt() Float64x8
 
 // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VRSQRT14PS, CPU Feature: AVX512F
 func (x Float32x4) ApproximateReciprocalOfSqrtMasked(mask Mask32x4) Float32x4
 
 // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VRSQRT14PS, CPU Feature: AVX512F
 func (x Float32x8) ApproximateReciprocalOfSqrtMasked(mask Mask32x8) Float32x8
 
 // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VRSQRT14PS, CPU Feature: AVX512F
 func (x Float32x16) ApproximateReciprocalOfSqrtMasked(mask Mask32x16) Float32x16
 
 // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VRSQRT14PD, CPU Feature: AVX512F
 func (x Float64x2) ApproximateReciprocalOfSqrtMasked(mask Mask64x2) Float64x2
 
 // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VRSQRT14PD, CPU Feature: AVX512F
 func (x Float64x4) ApproximateReciprocalOfSqrtMasked(mask Mask64x4) Float64x4
 
 // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VRSQRT14PD, CPU Feature: AVX512F
 func (x Float64x8) ApproximateReciprocalOfSqrtMasked(mask Mask64x8) Float64x8
 
@@ -946,31 +1102,43 @@ func (x Uint16x32) Average(y Uint16x32) Uint16x32
 
 // AverageMasked computes the rounded average of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPAVGB, CPU Feature: AVX512BW
 func (x Uint8x16) AverageMasked(y Uint8x16, mask Mask8x16) Uint8x16
 
 // AverageMasked computes the rounded average of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPAVGB, CPU Feature: AVX512BW
 func (x Uint8x32) AverageMasked(y Uint8x32, mask Mask8x32) Uint8x32
 
 // AverageMasked computes the rounded average of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPAVGB, CPU Feature: AVX512BW
 func (x Uint8x64) AverageMasked(y Uint8x64, mask Mask8x64) Uint8x64
 
 // AverageMasked computes the rounded average of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPAVGW, CPU Feature: AVX512BW
 func (x Uint16x8) AverageMasked(y Uint16x8, mask Mask16x8) Uint16x8
 
 // AverageMasked computes the rounded average of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPAVGW, CPU Feature: AVX512BW
 func (x Uint16x16) AverageMasked(y Uint16x16, mask Mask16x16) Uint16x16
 
 // AverageMasked computes the rounded average of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPAVGW, CPU Feature: AVX512BW
 func (x Uint16x32) AverageMasked(y Uint16x32, mask Mask16x32) Uint16x32
 
@@ -998,42 +1166,42 @@ func (x Float64x4) Ceil() Float64x4
 
 /* CeilWithPrecision */
 
-// CeilWithPrecision rounds elements up with specified precision, masked.
+// CeilWithPrecision rounds elements up with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x4) CeilWithPrecision(prec uint8) Float32x4
 
-// CeilWithPrecision rounds elements up with specified precision, masked.
+// CeilWithPrecision rounds elements up with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x8) CeilWithPrecision(prec uint8) Float32x8
 
-// CeilWithPrecision rounds elements up with specified precision, masked.
+// CeilWithPrecision rounds elements up with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x16) CeilWithPrecision(prec uint8) Float32x16
 
-// CeilWithPrecision rounds elements up with specified precision, masked.
+// CeilWithPrecision rounds elements up with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x2) CeilWithPrecision(prec uint8) Float64x2
 
-// CeilWithPrecision rounds elements up with specified precision, masked.
+// CeilWithPrecision rounds elements up with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x4) CeilWithPrecision(prec uint8) Float64x4
 
-// CeilWithPrecision rounds elements up with specified precision, masked.
+// CeilWithPrecision rounds elements up with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
@@ -1042,42 +1210,54 @@ func (x Float64x8) CeilWithPrecision(prec uint8) Float64x8
 
 /* CeilWithPrecisionMasked */
 
-// CeilWithPrecisionMasked rounds elements up with specified precision, masked.
+// CeilWithPrecisionMasked rounds elements up with specified precision.
+//
+// This operation is applied selectively under a write mask.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x4) CeilWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
 
-// CeilWithPrecisionMasked rounds elements up with specified precision, masked.
+// CeilWithPrecisionMasked rounds elements up with specified precision.
+//
+// This operation is applied selectively under a write mask.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x8) CeilWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
 
-// CeilWithPrecisionMasked rounds elements up with specified precision, masked.
+// CeilWithPrecisionMasked rounds elements up with specified precision.
+//
+// This operation is applied selectively under a write mask.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x16) CeilWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
 
-// CeilWithPrecisionMasked rounds elements up with specified precision, masked.
+// CeilWithPrecisionMasked rounds elements up with specified precision.
+//
+// This operation is applied selectively under a write mask.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x2) CeilWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
 
-// CeilWithPrecisionMasked rounds elements up with specified precision, masked.
+// CeilWithPrecisionMasked rounds elements up with specified precision.
+//
+// This operation is applied selectively under a write mask.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x4) CeilWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
 
-// CeilWithPrecisionMasked rounds elements up with specified precision, masked.
+// CeilWithPrecisionMasked rounds elements up with specified precision.
+//
+// This operation is applied selectively under a write mask.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
@@ -1314,6 +1494,8 @@ func (x Float64x8) DiffWithCeilWithPrecision(prec uint8) Float64x8
 
 // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512DQ
@@ -1321,6 +1503,8 @@ func (x Float32x4) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask32x4) Fl
 
 // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512DQ
@@ -1328,6 +1512,8 @@ func (x Float32x8) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask32x8) Fl
 
 // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512DQ
@@ -1335,6 +1521,8 @@ func (x Float32x16) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask32x16)
 
 // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512DQ
@@ -1342,6 +1530,8 @@ func (x Float64x2) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask64x2) Fl
 
 // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512DQ
@@ -1349,6 +1539,8 @@ func (x Float64x4) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask64x4) Fl
 
 // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512DQ
@@ -1402,6 +1594,8 @@ func (x Float64x8) DiffWithFloorWithPrecision(prec uint8) Float64x8
 
 // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512DQ
@@ -1409,6 +1603,8 @@ func (x Float32x4) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask32x4) F
 
 // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512DQ
@@ -1416,6 +1612,8 @@ func (x Float32x8) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask32x8) F
 
 // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512DQ
@@ -1423,6 +1621,8 @@ func (x Float32x16) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask32x16)
 
 // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512DQ
@@ -1430,6 +1630,8 @@ func (x Float64x2) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask64x2) F
 
 // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512DQ
@@ -1437,6 +1639,8 @@ func (x Float64x4) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask64x4) F
 
 // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512DQ
@@ -1490,6 +1694,8 @@ func (x Float64x8) DiffWithRoundWithPrecision(prec uint8) Float64x8
 
 // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512DQ
@@ -1497,6 +1703,8 @@ func (x Float32x4) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask32x4) F
 
 // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512DQ
@@ -1504,6 +1712,8 @@ func (x Float32x8) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask32x8) F
 
 // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512DQ
@@ -1511,6 +1721,8 @@ func (x Float32x16) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask32x16)
 
 // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512DQ
@@ -1518,6 +1730,8 @@ func (x Float64x2) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask64x2) F
 
 // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512DQ
@@ -1525,6 +1739,8 @@ func (x Float64x4) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask64x4) F
 
 // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512DQ
@@ -1578,6 +1794,8 @@ func (x Float64x8) DiffWithTruncWithPrecision(prec uint8) Float64x8
 
 // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512DQ
@@ -1585,6 +1803,8 @@ func (x Float32x4) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask32x4) F
 
 // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512DQ
@@ -1592,6 +1812,8 @@ func (x Float32x8) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask32x8) F
 
 // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512DQ
@@ -1599,6 +1821,8 @@ func (x Float32x16) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask32x16)
 
 // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512DQ
@@ -1606,6 +1830,8 @@ func (x Float64x2) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask64x2) F
 
 // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512DQ
@@ -1613,6 +1839,8 @@ func (x Float64x4) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask64x4) F
 
 // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512DQ
@@ -1654,31 +1882,43 @@ func (x Float64x8) Div(y Float64x8) Float64x8
 
 // DivMasked divides elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VDIVPS, CPU Feature: AVX512F
 func (x Float32x4) DivMasked(y Float32x4, mask Mask32x4) Float32x4
 
 // DivMasked divides elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VDIVPS, CPU Feature: AVX512F
 func (x Float32x8) DivMasked(y Float32x8, mask Mask32x8) Float32x8
 
 // DivMasked divides elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VDIVPS, CPU Feature: AVX512F
 func (x Float32x16) DivMasked(y Float32x16, mask Mask32x16) Float32x16
 
 // DivMasked divides elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VDIVPD, CPU Feature: AVX512F
 func (x Float64x2) DivMasked(y Float64x2, mask Mask64x2) Float64x2
 
 // DivMasked divides elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VDIVPD, CPU Feature: AVX512F
 func (x Float64x4) DivMasked(y Float64x4, mask Mask64x4) Float64x4
 
 // DivMasked divides elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VDIVPD, CPU Feature: AVX512F
 func (x Float64x8) DivMasked(y Float64x8, mask Mask64x8) Float64x8
 
@@ -1791,7 +2031,7 @@ func (x Float32x4) Equal(y Float32x4) Mask32x4
 // Asm: VCMPPS, CPU Feature: AVX
 func (x Float32x8) Equal(y Float32x8) Mask32x8
 
-// Equal compares for equality, masked.
+// Equal compares for equality.
 //
 // Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x16) Equal(y Float32x16) Mask32x16
@@ -1806,199 +2046,259 @@ func (x Float64x2) Equal(y Float64x2) Mask64x2
 // Asm: VCMPPD, CPU Feature: AVX
 func (x Float64x4) Equal(y Float64x4) Mask64x4
 
-// Equal compares for equality, masked.
+// Equal compares for equality.
 //
 // Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x8) Equal(y Float64x8) Mask64x8
 
-// Equal compares for equality, masked.
+// Equal compares for equality.
 //
 // Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x64) Equal(y Int8x64) Mask8x64
 
-// Equal compares for equality, masked.
+// Equal compares for equality.
 //
 // Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x32) Equal(y Int16x32) Mask16x32
 
-// Equal compares for equality, masked.
+// Equal compares for equality.
 //
 // Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x16) Equal(y Int32x16) Mask32x16
 
-// Equal compares for equality, masked.
+// Equal compares for equality.
 //
 // Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x8) Equal(y Int64x8) Mask64x8
 
-// Equal compares for equality, masked.
+// Equal compares for equality.
 //
 // Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x64) Equal(y Uint8x64) Mask8x64
 
-// Equal compares for equality, masked.
+// Equal compares for equality.
 //
 // Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x32) Equal(y Uint16x32) Mask16x32
 
-// Equal compares for equality, masked.
+// Equal compares for equality.
 //
 // Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x16) Equal(y Uint32x16) Mask32x16
 
-// Equal compares for equality, masked.
+// Equal compares for equality.
 //
 // Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x8) Equal(y Uint64x8) Mask64x8
 
 /* EqualMasked */
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x4) EqualMasked(y Float32x4, mask Mask32x4) Mask32x4
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x8) EqualMasked(y Float32x8, mask Mask32x8) Mask32x8
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x16) EqualMasked(y Float32x16, mask Mask32x16) Mask32x16
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x2) EqualMasked(y Float64x2, mask Mask64x2) Mask64x2
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x4) EqualMasked(y Float64x4, mask Mask64x4) Mask64x4
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x8) EqualMasked(y Float64x8, mask Mask64x8) Mask64x8
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x16) EqualMasked(y Int8x16, mask Mask8x16) Mask8x16
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x32) EqualMasked(y Int8x32, mask Mask8x32) Mask8x32
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x64) EqualMasked(y Int8x64, mask Mask8x64) Mask8x64
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x8) EqualMasked(y Int16x8, mask Mask16x8) Mask16x8
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x16) EqualMasked(y Int16x16, mask Mask16x16) Mask16x16
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x32) EqualMasked(y Int16x32, mask Mask16x32) Mask16x32
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x4) EqualMasked(y Int32x4, mask Mask32x4) Mask32x4
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x8) EqualMasked(y Int32x8, mask Mask32x8) Mask32x8
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x16) EqualMasked(y Int32x16, mask Mask32x16) Mask32x16
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x2) EqualMasked(y Int64x2, mask Mask64x2) Mask64x2
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x4) EqualMasked(y Int64x4, mask Mask64x4) Mask64x4
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x8) EqualMasked(y Int64x8, mask Mask64x8) Mask64x8
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x16) EqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x32) EqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x64) EqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x8) EqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x16) EqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x32) EqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x4) EqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x8) EqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x16) EqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x2) EqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x4) EqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
 
-// EqualMasked compares for equality, masked.
+// EqualMasked compares for equality.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x8) EqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
@@ -2027,42 +2327,42 @@ func (x Float64x4) Floor() Float64x4
 
 /* FloorWithPrecision */
 
-// FloorWithPrecision rounds elements down with specified precision, masked.
+// FloorWithPrecision rounds elements down with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x4) FloorWithPrecision(prec uint8) Float32x4
 
-// FloorWithPrecision rounds elements down with specified precision, masked.
+// FloorWithPrecision rounds elements down with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x8) FloorWithPrecision(prec uint8) Float32x8
 
-// FloorWithPrecision rounds elements down with specified precision, masked.
+// FloorWithPrecision rounds elements down with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x16) FloorWithPrecision(prec uint8) Float32x16
 
-// FloorWithPrecision rounds elements down with specified precision, masked.
+// FloorWithPrecision rounds elements down with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x2) FloorWithPrecision(prec uint8) Float64x2
 
-// FloorWithPrecision rounds elements down with specified precision, masked.
+// FloorWithPrecision rounds elements down with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x4) FloorWithPrecision(prec uint8) Float64x4
 
-// FloorWithPrecision rounds elements down with specified precision, masked.
+// FloorWithPrecision rounds elements down with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
@@ -2071,42 +2371,54 @@ func (x Float64x8) FloorWithPrecision(prec uint8) Float64x8
 
 /* FloorWithPrecisionMasked */
 
-// FloorWithPrecisionMasked rounds elements down with specified precision, masked.
+// FloorWithPrecisionMasked rounds elements down with specified precision.
+//
+// This operation is applied selectively under a write mask.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x4) FloorWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
 
-// FloorWithPrecisionMasked rounds elements down with specified precision, masked.
+// FloorWithPrecisionMasked rounds elements down with specified precision.
+//
+// This operation is applied selectively under a write mask.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x8) FloorWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
 
-// FloorWithPrecisionMasked rounds elements down with specified precision, masked.
+// FloorWithPrecisionMasked rounds elements down with specified precision.
+//
+// This operation is applied selectively under a write mask.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x16) FloorWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
 
-// FloorWithPrecisionMasked rounds elements down with specified precision, masked.
+// FloorWithPrecisionMasked rounds elements down with specified precision.
+//
+// This operation is applied selectively under a write mask.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x2) FloorWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
 
-// FloorWithPrecisionMasked rounds elements down with specified precision, masked.
+// FloorWithPrecisionMasked rounds elements down with specified precision.
+//
+// This operation is applied selectively under a write mask.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x4) FloorWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
 
-// FloorWithPrecisionMasked rounds elements down with specified precision, masked.
+// FloorWithPrecisionMasked rounds elements down with specified precision.
+//
+// This operation is applied selectively under a write mask.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
@@ -2149,31 +2461,43 @@ func (x Float64x8) FusedMultiplyAdd(y Float64x8, z Float64x8) Float64x8
 
 // FusedMultiplyAddMasked performs (x * y) + z.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VFMADD213PS, CPU Feature: AVX512F
 func (x Float32x4) FusedMultiplyAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
 
 // FusedMultiplyAddMasked performs (x * y) + z.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VFMADD213PS, CPU Feature: AVX512F
 func (x Float32x8) FusedMultiplyAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
 
 // FusedMultiplyAddMasked performs (x * y) + z.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VFMADD213PS, CPU Feature: AVX512F
 func (x Float32x16) FusedMultiplyAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
 
 // FusedMultiplyAddMasked performs (x * y) + z.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VFMADD213PD, CPU Feature: AVX512F
 func (x Float64x2) FusedMultiplyAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
 
 // FusedMultiplyAddMasked performs (x * y) + z.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VFMADD213PD, CPU Feature: AVX512F
 func (x Float64x4) FusedMultiplyAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
 
 // FusedMultiplyAddMasked performs (x * y) + z.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VFMADD213PD, CPU Feature: AVX512F
 func (x Float64x8) FusedMultiplyAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
 
@@ -2213,31 +2537,43 @@ func (x Float64x8) FusedMultiplyAddSub(y Float64x8, z Float64x8) Float64x8
 
 // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VFMADDSUB213PS, CPU Feature: AVX512F
 func (x Float32x4) FusedMultiplyAddSubMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
 
 // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VFMADDSUB213PS, CPU Feature: AVX512F
 func (x Float32x8) FusedMultiplyAddSubMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
 
 // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VFMADDSUB213PS, CPU Feature: AVX512F
 func (x Float32x16) FusedMultiplyAddSubMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
 
 // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VFMADDSUB213PD, CPU Feature: AVX512F
 func (x Float64x2) FusedMultiplyAddSubMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
 
 // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VFMADDSUB213PD, CPU Feature: AVX512F
 func (x Float64x4) FusedMultiplyAddSubMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
 
 // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VFMADDSUB213PD, CPU Feature: AVX512F
 func (x Float64x8) FusedMultiplyAddSubMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
 
@@ -2277,31 +2613,43 @@ func (x Float64x8) FusedMultiplySubAdd(y Float64x8, z Float64x8) Float64x8
 
 // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VFMSUBADD213PS, CPU Feature: AVX512F
 func (x Float32x4) FusedMultiplySubAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
 
 // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VFMSUBADD213PS, CPU Feature: AVX512F
 func (x Float32x8) FusedMultiplySubAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
 
 // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VFMSUBADD213PS, CPU Feature: AVX512F
 func (x Float32x16) FusedMultiplySubAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
 
 // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VFMSUBADD213PD, CPU Feature: AVX512F
 func (x Float64x2) FusedMultiplySubAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
 
 // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VFMSUBADD213PD, CPU Feature: AVX512F
 func (x Float64x4) FusedMultiplySubAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
 
 // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VFMSUBADD213PD, CPU Feature: AVX512F
 func (x Float64x8) FusedMultiplySubAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
 
@@ -2380,6 +2728,8 @@ func (x Uint8x64) GaloisFieldAffineTransformInverse(y Uint64x8, b uint8) Uint8x6
 // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
 // corresponding to a group of 8 elements in x.
 //
+// This operation is applied selectively under a write mask.
+//
 // b is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
@@ -2391,6 +2741,8 @@ func (x Uint8x16) GaloisFieldAffineTransformInverseMasked(y Uint64x2, b uint8, m
 // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
 // corresponding to a group of 8 elements in x.
 //
+// This operation is applied selectively under a write mask.
+//
 // b is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
@@ -2402,6 +2754,8 @@ func (x Uint8x32) GaloisFieldAffineTransformInverseMasked(y Uint64x4, b uint8, m
 // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
 // corresponding to a group of 8 elements in x.
 //
+// This operation is applied selectively under a write mask.
+//
 // b is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
@@ -2414,6 +2768,8 @@ func (x Uint8x64) GaloisFieldAffineTransformInverseMasked(y Uint64x8, b uint8, m
 // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
 // corresponding to a group of 8 elements in x.
 //
+// This operation is applied selectively under a write mask.
+//
 // b is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
@@ -2424,6 +2780,8 @@ func (x Uint8x16) GaloisFieldAffineTransformMasked(y Uint64x2, b uint8, m Mask8x
 // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
 // corresponding to a group of 8 elements in x.
 //
+// This operation is applied selectively under a write mask.
+//
 // b is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
@@ -2434,6 +2792,8 @@ func (x Uint8x32) GaloisFieldAffineTransformMasked(y Uint64x4, b uint8, m Mask8x
 // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
 // corresponding to a group of 8 elements in x.
 //
+// This operation is applied selectively under a write mask.
+//
 // b is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
@@ -2464,18 +2824,24 @@ func (x Uint8x64) GaloisFieldMul(y Uint8x64) Uint8x64
 // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
 // reduction polynomial x^8 + x^4 + x^3 + x + 1.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
 func (x Uint8x16) GaloisFieldMulMasked(y Uint8x16, mask Mask8x16) Uint8x16
 
 // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
 // reduction polynomial x^8 + x^4 + x^3 + x + 1.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
 func (x Uint8x32) GaloisFieldMulMasked(y Uint8x32, mask Mask8x32) Uint8x32
 
 // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
 // reduction polynomial x^8 + x^4 + x^3 + x + 1.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
 func (x Uint8x64) GaloisFieldMulMasked(y Uint8x64, mask Mask8x64) Uint8x64
 
@@ -2917,151 +3283,211 @@ func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x4) GreaterEqualMasked(y Float32x4, mask Mask32x4) Mask32x4
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x8) GreaterEqualMasked(y Float32x8, mask Mask32x8) Mask32x8
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x16) GreaterEqualMasked(y Float32x16, mask Mask32x16) Mask32x16
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x2) GreaterEqualMasked(y Float64x2, mask Mask64x2) Mask64x2
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x4) GreaterEqualMasked(y Float64x4, mask Mask64x4) Mask64x4
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x8) GreaterEqualMasked(y Float64x8, mask Mask64x8) Mask64x8
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x16) GreaterEqualMasked(y Int8x16, mask Mask8x16) Mask8x16
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x32) GreaterEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x64) GreaterEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x8) GreaterEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x16) GreaterEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x32) GreaterEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x4) GreaterEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x8) GreaterEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x16) GreaterEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x2) GreaterEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x4) GreaterEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x8) GreaterEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x16) GreaterEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x32) GreaterEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x64) GreaterEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x8) GreaterEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x16) GreaterEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x32) GreaterEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x4) GreaterEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x8) GreaterEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x16) GreaterEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x2) GreaterEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x4) GreaterEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
 
 // GreaterEqualMasked compares for greater than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x8) GreaterEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
 
@@ -3069,151 +3495,211 @@ func (x Uint64x8) GreaterEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x4) GreaterMasked(y Float32x4, mask Mask32x4) Mask32x4
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x8) GreaterMasked(y Float32x8, mask Mask32x8) Mask32x8
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x16) GreaterMasked(y Float32x16, mask Mask32x16) Mask32x16
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x2) GreaterMasked(y Float64x2, mask Mask64x2) Mask64x2
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x4) GreaterMasked(y Float64x4, mask Mask64x4) Mask64x4
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x8) GreaterMasked(y Float64x8, mask Mask64x8) Mask64x8
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x16) GreaterMasked(y Int8x16, mask Mask8x16) Mask8x16
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x32) GreaterMasked(y Int8x32, mask Mask8x32) Mask8x32
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x64) GreaterMasked(y Int8x64, mask Mask8x64) Mask8x64
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x8) GreaterMasked(y Int16x8, mask Mask16x8) Mask16x8
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x16) GreaterMasked(y Int16x16, mask Mask16x16) Mask16x16
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x32) GreaterMasked(y Int16x32, mask Mask16x32) Mask16x32
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x4) GreaterMasked(y Int32x4, mask Mask32x4) Mask32x4
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x8) GreaterMasked(y Int32x8, mask Mask32x8) Mask32x8
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x16) GreaterMasked(y Int32x16, mask Mask32x16) Mask32x16
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x2) GreaterMasked(y Int64x2, mask Mask64x2) Mask64x2
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x4) GreaterMasked(y Int64x4, mask Mask64x4) Mask64x4
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x8) GreaterMasked(y Int64x8, mask Mask64x8) Mask64x8
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x16) GreaterMasked(y Uint8x16, mask Mask8x16) Mask8x16
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x32) GreaterMasked(y Uint8x32, mask Mask8x32) Mask8x32
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x64) GreaterMasked(y Uint8x64, mask Mask8x64) Mask8x64
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x8) GreaterMasked(y Uint16x8, mask Mask16x8) Mask16x8
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x16) GreaterMasked(y Uint16x16, mask Mask16x16) Mask16x16
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x32) GreaterMasked(y Uint16x32, mask Mask16x32) Mask16x32
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x4) GreaterMasked(y Uint32x4, mask Mask32x4) Mask32x4
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x8) GreaterMasked(y Uint32x8, mask Mask32x8) Mask32x8
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x16) GreaterMasked(y Uint32x16, mask Mask32x16) Mask32x16
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x2) GreaterMasked(y Uint64x2, mask Mask64x2) Mask64x2
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x4) GreaterMasked(y Uint64x4, mask Mask64x4) Mask64x4
 
 // GreaterMasked compares for greater than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x8) GreaterMasked(y Uint64x8, mask Mask64x8) Mask64x8
 
@@ -3253,31 +3739,43 @@ func (x Float64x8) IsNan(y Float64x8) Mask64x8
 
 // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x4) IsNanMasked(y Float32x4, mask Mask32x4) Mask32x4
 
 // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x8) IsNanMasked(y Float32x8, mask Mask32x8) Mask32x8
 
 // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x16) IsNanMasked(y Float32x16, mask Mask32x16) Mask32x16
 
 // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x2) IsNanMasked(y Float64x2, mask Mask64x2) Mask64x2
 
 // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x4) IsNanMasked(y Float64x4, mask Mask64x4) Mask64x4
 
 // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x8) IsNanMasked(y Float64x8, mask Mask64x8) Mask64x8
 
@@ -3589,151 +4087,211 @@ func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x4) LessEqualMasked(y Float32x4, mask Mask32x4) Mask32x4
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x8) LessEqualMasked(y Float32x8, mask Mask32x8) Mask32x8
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x16) LessEqualMasked(y Float32x16, mask Mask32x16) Mask32x16
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x2) LessEqualMasked(y Float64x2, mask Mask64x2) Mask64x2
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x4) LessEqualMasked(y Float64x4, mask Mask64x4) Mask64x4
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x8) LessEqualMasked(y Float64x8, mask Mask64x8) Mask64x8
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x16) LessEqualMasked(y Int8x16, mask Mask8x16) Mask8x16
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x32) LessEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x64) LessEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x8) LessEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x16) LessEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x32) LessEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x4) LessEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x8) LessEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x16) LessEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x2) LessEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x4) LessEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x8) LessEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x16) LessEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x32) LessEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x64) LessEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x8) LessEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x16) LessEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x32) LessEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x4) LessEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x8) LessEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x16) LessEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x2) LessEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x4) LessEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
 
 // LessEqualMasked compares for less than or equal.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x8) LessEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
 
@@ -3741,151 +4299,211 @@ func (x Uint64x8) LessEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x4) LessMasked(y Float32x4, mask Mask32x4) Mask32x4
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x8) LessMasked(y Float32x8, mask Mask32x8) Mask32x8
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x16) LessMasked(y Float32x16, mask Mask32x16) Mask32x16
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x2) LessMasked(y Float64x2, mask Mask64x2) Mask64x2
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x4) LessMasked(y Float64x4, mask Mask64x4) Mask64x4
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x8) LessMasked(y Float64x8, mask Mask64x8) Mask64x8
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x16) LessMasked(y Int8x16, mask Mask8x16) Mask8x16
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x32) LessMasked(y Int8x32, mask Mask8x32) Mask8x32
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x64) LessMasked(y Int8x64, mask Mask8x64) Mask8x64
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x8) LessMasked(y Int16x8, mask Mask16x8) Mask16x8
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x16) LessMasked(y Int16x16, mask Mask16x16) Mask16x16
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x32) LessMasked(y Int16x32, mask Mask16x32) Mask16x32
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x4) LessMasked(y Int32x4, mask Mask32x4) Mask32x4
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x8) LessMasked(y Int32x8, mask Mask32x8) Mask32x8
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x16) LessMasked(y Int32x16, mask Mask32x16) Mask32x16
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x2) LessMasked(y Int64x2, mask Mask64x2) Mask64x2
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x4) LessMasked(y Int64x4, mask Mask64x4) Mask64x4
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x8) LessMasked(y Int64x8, mask Mask64x8) Mask64x8
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x16) LessMasked(y Uint8x16, mask Mask8x16) Mask8x16
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x32) LessMasked(y Uint8x32, mask Mask8x32) Mask8x32
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x64) LessMasked(y Uint8x64, mask Mask8x64) Mask8x64
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x8) LessMasked(y Uint16x8, mask Mask16x8) Mask16x8
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x16) LessMasked(y Uint16x16, mask Mask16x16) Mask16x16
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x32) LessMasked(y Uint16x32, mask Mask16x32) Mask16x32
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x4) LessMasked(y Uint32x4, mask Mask32x4) Mask32x4
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x8) LessMasked(y Uint32x8, mask Mask32x8) Mask32x8
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x16) LessMasked(y Uint32x16, mask Mask32x16) Mask32x16
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x2) LessMasked(y Uint64x2, mask Mask64x2) Mask64x2
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x4) LessMasked(y Uint64x4, mask Mask64x4) Mask64x4
 
 // LessMasked compares for less than.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x8) LessMasked(y Uint64x8, mask Mask64x8) Mask64x8
 
@@ -4045,151 +4663,211 @@ func (x Uint64x8) Max(y Uint64x8) Uint64x8
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VMAXPS, CPU Feature: AVX512F
 func (x Float32x4) MaxMasked(y Float32x4, mask Mask32x4) Float32x4
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VMAXPS, CPU Feature: AVX512F
 func (x Float32x8) MaxMasked(y Float32x8, mask Mask32x8) Float32x8
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VMAXPS, CPU Feature: AVX512F
 func (x Float32x16) MaxMasked(y Float32x16, mask Mask32x16) Float32x16
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VMAXPD, CPU Feature: AVX512F
 func (x Float64x2) MaxMasked(y Float64x2, mask Mask64x2) Float64x2
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VMAXPD, CPU Feature: AVX512F
 func (x Float64x4) MaxMasked(y Float64x4, mask Mask64x4) Float64x4
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VMAXPD, CPU Feature: AVX512F
 func (x Float64x8) MaxMasked(y Float64x8, mask Mask64x8) Float64x8
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXSB, CPU Feature: AVX512BW
 func (x Int8x16) MaxMasked(y Int8x16, mask Mask8x16) Int8x16
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXSB, CPU Feature: AVX512BW
 func (x Int8x32) MaxMasked(y Int8x32, mask Mask8x32) Int8x32
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXSB, CPU Feature: AVX512BW
 func (x Int8x64) MaxMasked(y Int8x64, mask Mask8x64) Int8x64
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXSW, CPU Feature: AVX512BW
 func (x Int16x8) MaxMasked(y Int16x8, mask Mask16x8) Int16x8
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXSW, CPU Feature: AVX512BW
 func (x Int16x16) MaxMasked(y Int16x16, mask Mask16x16) Int16x16
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXSW, CPU Feature: AVX512BW
 func (x Int16x32) MaxMasked(y Int16x32, mask Mask16x32) Int16x32
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXSD, CPU Feature: AVX512F
 func (x Int32x4) MaxMasked(y Int32x4, mask Mask32x4) Int32x4
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXSD, CPU Feature: AVX512F
 func (x Int32x8) MaxMasked(y Int32x8, mask Mask32x8) Int32x8
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXSD, CPU Feature: AVX512F
 func (x Int32x16) MaxMasked(y Int32x16, mask Mask32x16) Int32x16
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXSQ, CPU Feature: AVX512F
 func (x Int64x2) MaxMasked(y Int64x2, mask Mask64x2) Int64x2
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXSQ, CPU Feature: AVX512F
 func (x Int64x4) MaxMasked(y Int64x4, mask Mask64x4) Int64x4
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXSQ, CPU Feature: AVX512F
 func (x Int64x8) MaxMasked(y Int64x8, mask Mask64x8) Int64x8
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXUB, CPU Feature: AVX512BW
 func (x Uint8x16) MaxMasked(y Uint8x16, mask Mask8x16) Uint8x16
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXUB, CPU Feature: AVX512BW
 func (x Uint8x32) MaxMasked(y Uint8x32, mask Mask8x32) Uint8x32
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXUB, CPU Feature: AVX512BW
 func (x Uint8x64) MaxMasked(y Uint8x64, mask Mask8x64) Uint8x64
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXUW, CPU Feature: AVX512BW
 func (x Uint16x8) MaxMasked(y Uint16x8, mask Mask16x8) Uint16x8
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXUW, CPU Feature: AVX512BW
 func (x Uint16x16) MaxMasked(y Uint16x16, mask Mask16x16) Uint16x16
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXUW, CPU Feature: AVX512BW
 func (x Uint16x32) MaxMasked(y Uint16x32, mask Mask16x32) Uint16x32
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXUD, CPU Feature: AVX512F
 func (x Uint32x4) MaxMasked(y Uint32x4, mask Mask32x4) Uint32x4
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXUD, CPU Feature: AVX512F
 func (x Uint32x8) MaxMasked(y Uint32x8, mask Mask32x8) Uint32x8
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXUD, CPU Feature: AVX512F
 func (x Uint32x16) MaxMasked(y Uint32x16, mask Mask32x16) Uint32x16
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXUQ, CPU Feature: AVX512F
 func (x Uint64x2) MaxMasked(y Uint64x2, mask Mask64x2) Uint64x2
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXUQ, CPU Feature: AVX512F
 func (x Uint64x4) MaxMasked(y Uint64x4, mask Mask64x4) Uint64x4
 
 // MaxMasked computes the maximum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMAXUQ, CPU Feature: AVX512F
 func (x Uint64x8) MaxMasked(y Uint64x8, mask Mask64x8) Uint64x8
 
@@ -4349,151 +5027,211 @@ func (x Uint64x8) Min(y Uint64x8) Uint64x8
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VMINPS, CPU Feature: AVX512F
 func (x Float32x4) MinMasked(y Float32x4, mask Mask32x4) Float32x4
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VMINPS, CPU Feature: AVX512F
 func (x Float32x8) MinMasked(y Float32x8, mask Mask32x8) Float32x8
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VMINPS, CPU Feature: AVX512F
 func (x Float32x16) MinMasked(y Float32x16, mask Mask32x16) Float32x16
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VMINPD, CPU Feature: AVX512F
 func (x Float64x2) MinMasked(y Float64x2, mask Mask64x2) Float64x2
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VMINPD, CPU Feature: AVX512F
 func (x Float64x4) MinMasked(y Float64x4, mask Mask64x4) Float64x4
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VMINPD, CPU Feature: AVX512F
 func (x Float64x8) MinMasked(y Float64x8, mask Mask64x8) Float64x8
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINSB, CPU Feature: AVX512BW
 func (x Int8x16) MinMasked(y Int8x16, mask Mask8x16) Int8x16
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINSB, CPU Feature: AVX512BW
 func (x Int8x32) MinMasked(y Int8x32, mask Mask8x32) Int8x32
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINSB, CPU Feature: AVX512BW
 func (x Int8x64) MinMasked(y Int8x64, mask Mask8x64) Int8x64
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINSW, CPU Feature: AVX512BW
 func (x Int16x8) MinMasked(y Int16x8, mask Mask16x8) Int16x8
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINSW, CPU Feature: AVX512BW
 func (x Int16x16) MinMasked(y Int16x16, mask Mask16x16) Int16x16
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINSW, CPU Feature: AVX512BW
 func (x Int16x32) MinMasked(y Int16x32, mask Mask16x32) Int16x32
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINSD, CPU Feature: AVX512F
 func (x Int32x4) MinMasked(y Int32x4, mask Mask32x4) Int32x4
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINSD, CPU Feature: AVX512F
 func (x Int32x8) MinMasked(y Int32x8, mask Mask32x8) Int32x8
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINSD, CPU Feature: AVX512F
 func (x Int32x16) MinMasked(y Int32x16, mask Mask32x16) Int32x16
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINSQ, CPU Feature: AVX512F
 func (x Int64x2) MinMasked(y Int64x2, mask Mask64x2) Int64x2
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINSQ, CPU Feature: AVX512F
 func (x Int64x4) MinMasked(y Int64x4, mask Mask64x4) Int64x4
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINSQ, CPU Feature: AVX512F
 func (x Int64x8) MinMasked(y Int64x8, mask Mask64x8) Int64x8
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINUB, CPU Feature: AVX512BW
 func (x Uint8x16) MinMasked(y Uint8x16, mask Mask8x16) Uint8x16
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINUB, CPU Feature: AVX512BW
 func (x Uint8x32) MinMasked(y Uint8x32, mask Mask8x32) Uint8x32
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINUB, CPU Feature: AVX512BW
 func (x Uint8x64) MinMasked(y Uint8x64, mask Mask8x64) Uint8x64
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINUW, CPU Feature: AVX512BW
 func (x Uint16x8) MinMasked(y Uint16x8, mask Mask16x8) Uint16x8
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINUW, CPU Feature: AVX512BW
 func (x Uint16x16) MinMasked(y Uint16x16, mask Mask16x16) Uint16x16
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINUW, CPU Feature: AVX512BW
 func (x Uint16x32) MinMasked(y Uint16x32, mask Mask16x32) Uint16x32
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINUD, CPU Feature: AVX512F
 func (x Uint32x4) MinMasked(y Uint32x4, mask Mask32x4) Uint32x4
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINUD, CPU Feature: AVX512F
 func (x Uint32x8) MinMasked(y Uint32x8, mask Mask32x8) Uint32x8
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINUD, CPU Feature: AVX512F
 func (x Uint32x16) MinMasked(y Uint32x16, mask Mask32x16) Uint32x16
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINUQ, CPU Feature: AVX512F
 func (x Uint64x2) MinMasked(y Uint64x2, mask Mask64x2) Uint64x2
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINUQ, CPU Feature: AVX512F
 func (x Uint64x4) MinMasked(y Uint64x4, mask Mask64x4) Uint64x4
 
 // MinMasked computes the minimum of corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMINUQ, CPU Feature: AVX512F
 func (x Uint64x8) MinMasked(y Uint64x8, mask Mask64x8) Uint64x8
 
@@ -4509,7 +5247,7 @@ func (x Float32x4) Mul(y Float32x4) Float32x4
 // Asm: VMULPS, CPU Feature: AVX
 func (x Float32x8) Mul(y Float32x8) Float32x8
 
-// Mul multiplies corresponding elements of two vectors, masked.
+// Mul multiplies corresponding elements of two vectors.
 //
 // Asm: VMULPS, CPU Feature: AVX512F
 func (x Float32x16) Mul(y Float32x16) Float32x16
@@ -4524,7 +5262,7 @@ func (x Float64x2) Mul(y Float64x2) Float64x2
 // Asm: VMULPD, CPU Feature: AVX
 func (x Float64x4) Mul(y Float64x4) Float64x4
 
-// Mul multiplies corresponding elements of two vectors, masked.
+// Mul multiplies corresponding elements of two vectors.
 //
 // Asm: VMULPD, CPU Feature: AVX512F
 func (x Float64x8) Mul(y Float64x8) Float64x8
@@ -4565,31 +5303,43 @@ func (x Float64x8) MulByPowOf2(y Float64x8) Float64x8
 
 // MulByPowOf2Masked multiplies elements by a power of 2.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VSCALEFPS, CPU Feature: AVX512F
 func (x Float32x4) MulByPowOf2Masked(y Float32x4, mask Mask32x4) Float32x4
 
 // MulByPowOf2Masked multiplies elements by a power of 2.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VSCALEFPS, CPU Feature: AVX512F
 func (x Float32x8) MulByPowOf2Masked(y Float32x8, mask Mask32x8) Float32x8
 
 // MulByPowOf2Masked multiplies elements by a power of 2.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VSCALEFPS, CPU Feature: AVX512F
 func (x Float32x16) MulByPowOf2Masked(y Float32x16, mask Mask32x16) Float32x16
 
 // MulByPowOf2Masked multiplies elements by a power of 2.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VSCALEFPD, CPU Feature: AVX512F
 func (x Float64x2) MulByPowOf2Masked(y Float64x2, mask Mask64x2) Float64x2
 
 // MulByPowOf2Masked multiplies elements by a power of 2.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VSCALEFPD, CPU Feature: AVX512F
 func (x Float64x4) MulByPowOf2Masked(y Float64x4, mask Mask64x4) Float64x4
 
 // MulByPowOf2Masked multiplies elements by a power of 2.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VSCALEFPD, CPU Feature: AVX512F
 func (x Float64x8) MulByPowOf2Masked(y Float64x8, mask Mask64x8) Float64x8
 
@@ -4607,19 +5357,19 @@ func (x Int32x4) MulEvenWiden(y Int32x4) Int64x2
 // Asm: VPMULDQ, CPU Feature: AVX2
 func (x Int32x8) MulEvenWiden(y Int32x8) Int64x4
 
-// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
+// MulEvenWiden multiplies even-indexed elements, widening the result.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
 // Asm: VPMULDQ, CPU Feature: AVX512F
 func (x Int64x2) MulEvenWiden(y Int64x2) Int64x2
 
-// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
+// MulEvenWiden multiplies even-indexed elements, widening the result.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
 // Asm: VPMULDQ, CPU Feature: AVX512F
 func (x Int64x4) MulEvenWiden(y Int64x4) Int64x4
 
-// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
+// MulEvenWiden multiplies even-indexed elements, widening the result.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
 // Asm: VPMULDQ, CPU Feature: AVX512F
@@ -4637,19 +5387,19 @@ func (x Uint32x4) MulEvenWiden(y Uint32x4) Uint64x2
 // Asm: VPMULUDQ, CPU Feature: AVX2
 func (x Uint32x8) MulEvenWiden(y Uint32x8) Uint64x4
 
-// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
+// MulEvenWiden multiplies even-indexed elements, widening the result.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
 // Asm: VPMULUDQ, CPU Feature: AVX512F
 func (x Uint64x2) MulEvenWiden(y Uint64x2) Uint64x2
 
-// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
+// MulEvenWiden multiplies even-indexed elements, widening the result.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
 // Asm: VPMULUDQ, CPU Feature: AVX512F
 func (x Uint64x4) MulEvenWiden(y Uint64x4) Uint64x4
 
-// MulEvenWiden multiplies even-indexed elements, widening the result, masked.
+// MulEvenWiden multiplies even-indexed elements, widening the result.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
 // Asm: VPMULUDQ, CPU Feature: AVX512F
@@ -4657,39 +5407,51 @@ func (x Uint64x8) MulEvenWiden(y Uint64x8) Uint64x8
 
 /* MulEvenWidenMasked */
 
-// MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
+// MulEvenWidenMasked multiplies even-indexed elements, widening the result.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMULDQ, CPU Feature: AVX512F
 func (x Int64x2) MulEvenWidenMasked(y Int64x2, mask Mask64x2) Int64x2
 
-// MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
+// MulEvenWidenMasked multiplies even-indexed elements, widening the result.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMULDQ, CPU Feature: AVX512F
 func (x Int64x4) MulEvenWidenMasked(y Int64x4, mask Mask64x4) Int64x4
 
-// MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
+// MulEvenWidenMasked multiplies even-indexed elements, widening the result.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMULDQ, CPU Feature: AVX512F
 func (x Int64x8) MulEvenWidenMasked(y Int64x8, mask Mask64x8) Int64x8
 
-// MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
+// MulEvenWidenMasked multiplies even-indexed elements, widening the result.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMULUDQ, CPU Feature: AVX512F
 func (x Uint64x2) MulEvenWidenMasked(y Uint64x2, mask Mask64x2) Uint64x2
 
-// MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
+// MulEvenWidenMasked multiplies even-indexed elements, widening the result.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMULUDQ, CPU Feature: AVX512F
 func (x Uint64x4) MulEvenWidenMasked(y Uint64x4, mask Mask64x4) Uint64x4
 
-// MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
+// MulEvenWidenMasked multiplies even-indexed elements, widening the result.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMULUDQ, CPU Feature: AVX512F
 func (x Uint64x8) MulEvenWidenMasked(y Uint64x8, mask Mask64x8) Uint64x8
 
@@ -4705,7 +5467,7 @@ func (x Int16x8) MulHigh(y Int16x8) Int16x8
 // Asm: VPMULHW, CPU Feature: AVX2
 func (x Int16x16) MulHigh(y Int16x16) Int16x16
 
-// MulHigh multiplies elements and stores the high part of the result, masked.
+// MulHigh multiplies elements and stores the high part of the result.
 //
 // Asm: VPMULHW, CPU Feature: AVX512BW
 func (x Int16x32) MulHigh(y Int16x32) Int16x32
@@ -4720,39 +5482,51 @@ func (x Uint16x8) MulHigh(y Uint16x8) Uint16x8
 // Asm: VPMULHUW, CPU Feature: AVX2
 func (x Uint16x16) MulHigh(y Uint16x16) Uint16x16
 
-// MulHigh multiplies elements and stores the high part of the result, masked.
+// MulHigh multiplies elements and stores the high part of the result.
 //
 // Asm: VPMULHUW, CPU Feature: AVX512BW
 func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32
 
 /* MulHighMasked */
 
-// MulHighMasked multiplies elements and stores the high part of the result, masked.
+// MulHighMasked multiplies elements and stores the high part of the result.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPMULHW, CPU Feature: AVX512BW
 func (x Int16x8) MulHighMasked(y Int16x8, mask Mask16x8) Int16x8
 
-// MulHighMasked multiplies elements and stores the high part of the result, masked.
+// MulHighMasked multiplies elements and stores the high part of the result.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPMULHW, CPU Feature: AVX512BW
 func (x Int16x16) MulHighMasked(y Int16x16, mask Mask16x16) Int16x16
 
-// MulHighMasked multiplies elements and stores the high part of the result, masked.
+// MulHighMasked multiplies elements and stores the high part of the result.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPMULHW, CPU Feature: AVX512BW
 func (x Int16x32) MulHighMasked(y Int16x32, mask Mask16x32) Int16x32
 
-// MulHighMasked multiplies elements and stores the high part of the result, masked.
+// MulHighMasked multiplies elements and stores the high part of the result.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPMULHUW, CPU Feature: AVX512BW
 func (x Uint16x8) MulHighMasked(y Uint16x8, mask Mask16x8) Uint16x8
 
-// MulHighMasked multiplies elements and stores the high part of the result, masked.
+// MulHighMasked multiplies elements and stores the high part of the result.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPMULHUW, CPU Feature: AVX512BW
 func (x Uint16x16) MulHighMasked(y Uint16x16, mask Mask16x16) Uint16x16
 
-// MulHighMasked multiplies elements and stores the high part of the result, masked.
+// MulHighMasked multiplies elements and stores the high part of the result.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPMULHUW, CPU Feature: AVX512BW
 func (x Uint16x32) MulHighMasked(y Uint16x32, mask Mask16x32) Uint16x32
@@ -4769,7 +5543,7 @@ func (x Int16x8) MulLow(y Int16x8) Int16x8
 // Asm: VPMULLW, CPU Feature: AVX2
 func (x Int16x16) MulLow(y Int16x16) Int16x16
 
-// MulLow multiplies elements and stores the low part of the result, masked.
+// MulLow multiplies elements and stores the low part of the result.
 //
 // Asm: VPMULLW, CPU Feature: AVX512BW
 func (x Int16x32) MulLow(y Int16x32) Int16x32
@@ -4784,101 +5558,131 @@ func (x Int32x4) MulLow(y Int32x4) Int32x4
 // Asm: VPMULLD, CPU Feature: AVX2
 func (x Int32x8) MulLow(y Int32x8) Int32x8
 
-// MulLow multiplies elements and stores the low part of the result, masked.
+// MulLow multiplies elements and stores the low part of the result.
 //
 // Asm: VPMULLD, CPU Feature: AVX512F
 func (x Int32x16) MulLow(y Int32x16) Int32x16
 
-// MulLow multiplies elements and stores the low part of the result, masked.
+// MulLow multiplies elements and stores the low part of the result.
 //
 // Asm: VPMULLQ, CPU Feature: AVX512DQ
 func (x Int64x2) MulLow(y Int64x2) Int64x2
 
-// MulLow multiplies elements and stores the low part of the result, masked.
+// MulLow multiplies elements and stores the low part of the result.
 //
 // Asm: VPMULLQ, CPU Feature: AVX512DQ
 func (x Int64x4) MulLow(y Int64x4) Int64x4
 
-// MulLow multiplies elements and stores the low part of the result, masked.
+// MulLow multiplies elements and stores the low part of the result.
 //
 // Asm: VPMULLQ, CPU Feature: AVX512DQ
 func (x Int64x8) MulLow(y Int64x8) Int64x8
 
 /* MulLowMasked */
 
-// MulLowMasked multiplies elements and stores the low part of the result, masked.
+// MulLowMasked multiplies elements and stores the low part of the result.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPMULLW, CPU Feature: AVX512BW
 func (x Int16x8) MulLowMasked(y Int16x8, mask Mask16x8) Int16x8
 
-// MulLowMasked multiplies elements and stores the low part of the result, masked.
+// MulLowMasked multiplies elements and stores the low part of the result.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPMULLW, CPU Feature: AVX512BW
 func (x Int16x16) MulLowMasked(y Int16x16, mask Mask16x16) Int16x16
 
-// MulLowMasked multiplies elements and stores the low part of the result, masked.
+// MulLowMasked multiplies elements and stores the low part of the result.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPMULLW, CPU Feature: AVX512BW
 func (x Int16x32) MulLowMasked(y Int16x32, mask Mask16x32) Int16x32
 
-// MulLowMasked multiplies elements and stores the low part of the result, masked.
+// MulLowMasked multiplies elements and stores the low part of the result.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPMULLD, CPU Feature: AVX512F
 func (x Int32x4) MulLowMasked(y Int32x4, mask Mask32x4) Int32x4
 
-// MulLowMasked multiplies elements and stores the low part of the result, masked.
+// MulLowMasked multiplies elements and stores the low part of the result.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPMULLD, CPU Feature: AVX512F
 func (x Int32x8) MulLowMasked(y Int32x8, mask Mask32x8) Int32x8
 
-// MulLowMasked multiplies elements and stores the low part of the result, masked.
+// MulLowMasked multiplies elements and stores the low part of the result.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPMULLD, CPU Feature: AVX512F
 func (x Int32x16) MulLowMasked(y Int32x16, mask Mask32x16) Int32x16
 
-// MulLowMasked multiplies elements and stores the low part of the result, masked.
+// MulLowMasked multiplies elements and stores the low part of the result.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPMULLQ, CPU Feature: AVX512DQ
 func (x Int64x2) MulLowMasked(y Int64x2, mask Mask64x2) Int64x2
 
-// MulLowMasked multiplies elements and stores the low part of the result, masked.
+// MulLowMasked multiplies elements and stores the low part of the result.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPMULLQ, CPU Feature: AVX512DQ
 func (x Int64x4) MulLowMasked(y Int64x4, mask Mask64x4) Int64x4
 
-// MulLowMasked multiplies elements and stores the low part of the result, masked.
+// MulLowMasked multiplies elements and stores the low part of the result.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPMULLQ, CPU Feature: AVX512DQ
 func (x Int64x8) MulLowMasked(y Int64x8, mask Mask64x8) Int64x8
 
 /* MulMasked */
 
-// MulMasked multiplies corresponding elements of two vectors, masked.
+// MulMasked multiplies corresponding elements of two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VMULPS, CPU Feature: AVX512F
 func (x Float32x4) MulMasked(y Float32x4, mask Mask32x4) Float32x4
 
-// MulMasked multiplies corresponding elements of two vectors, masked.
+// MulMasked multiplies corresponding elements of two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VMULPS, CPU Feature: AVX512F
 func (x Float32x8) MulMasked(y Float32x8, mask Mask32x8) Float32x8
 
-// MulMasked multiplies corresponding elements of two vectors, masked.
+// MulMasked multiplies corresponding elements of two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VMULPS, CPU Feature: AVX512F
 func (x Float32x16) MulMasked(y Float32x16, mask Mask32x16) Float32x16
 
-// MulMasked multiplies corresponding elements of two vectors, masked.
+// MulMasked multiplies corresponding elements of two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VMULPD, CPU Feature: AVX512F
 func (x Float64x2) MulMasked(y Float64x2, mask Mask64x2) Float64x2
 
-// MulMasked multiplies corresponding elements of two vectors, masked.
+// MulMasked multiplies corresponding elements of two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VMULPD, CPU Feature: AVX512F
 func (x Float64x4) MulMasked(y Float64x4, mask Mask64x4) Float64x4
 
-// MulMasked multiplies corresponding elements of two vectors, masked.
+// MulMasked multiplies corresponding elements of two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VMULPD, CPU Feature: AVX512F
 func (x Float64x8) MulMasked(y Float64x8, mask Mask64x8) Float64x8
@@ -5039,151 +5843,211 @@ func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x4) NotEqualMasked(y Float32x4, mask Mask32x4) Mask32x4
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x8) NotEqualMasked(y Float32x8, mask Mask32x8) Mask32x8
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x16) NotEqualMasked(y Float32x16, mask Mask32x16) Mask32x16
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x2) NotEqualMasked(y Float64x2, mask Mask64x2) Mask64x2
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x4) NotEqualMasked(y Float64x4, mask Mask64x4) Mask64x4
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x8) NotEqualMasked(y Float64x8, mask Mask64x8) Mask64x8
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x16) NotEqualMasked(y Int8x16, mask Mask8x16) Mask8x16
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x32) NotEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x64) NotEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x8) NotEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x16) NotEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x32) NotEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x4) NotEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x8) NotEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x16) NotEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x2) NotEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x4) NotEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x8) NotEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x16) NotEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x32) NotEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x64) NotEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x8) NotEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x16) NotEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x32) NotEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x4) NotEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x8) NotEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x16) NotEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x2) NotEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x4) NotEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
 
 // NotEqualMasked compares for inequality.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x8) NotEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
 
@@ -5219,7 +6083,7 @@ func (x Int32x4) Or(y Int32x4) Int32x4
 // Asm: VPOR, CPU Feature: AVX2
 func (x Int32x8) Or(y Int32x8) Int32x8
 
-// Or performs a masked bitwise OR operation between two vectors.
+// Or performs a bitwise OR operation between two vectors.
 //
 // Asm: VPORD, CPU Feature: AVX512F
 func (x Int32x16) Or(y Int32x16) Int32x16
@@ -5234,7 +6098,7 @@ func (x Int64x2) Or(y Int64x2) Int64x2
 // Asm: VPOR, CPU Feature: AVX2
 func (x Int64x4) Or(y Int64x4) Int64x4
 
-// Or performs a masked bitwise OR operation between two vectors.
+// Or performs a bitwise OR operation between two vectors.
 //
 // Asm: VPORQ, CPU Feature: AVX512F
 func (x Int64x8) Or(y Int64x8) Int64x8
@@ -5269,7 +6133,7 @@ func (x Uint32x4) Or(y Uint32x4) Uint32x4
 // Asm: VPOR, CPU Feature: AVX2
 func (x Uint32x8) Or(y Uint32x8) Uint32x8
 
-// Or performs a masked bitwise OR operation between two vectors.
+// Or performs a bitwise OR operation between two vectors.
 //
 // Asm: VPORD, CPU Feature: AVX512F
 func (x Uint32x16) Or(y Uint32x16) Uint32x16
@@ -5284,69 +6148,93 @@ func (x Uint64x2) Or(y Uint64x2) Uint64x2
 // Asm: VPOR, CPU Feature: AVX2
 func (x Uint64x4) Or(y Uint64x4) Uint64x4
 
-// Or performs a masked bitwise OR operation between two vectors.
+// Or performs a bitwise OR operation between two vectors.
 //
 // Asm: VPORQ, CPU Feature: AVX512F
 func (x Uint64x8) Or(y Uint64x8) Uint64x8
 
 /* OrMasked */
 
-// OrMasked performs a masked bitwise OR operation between two vectors.
+// OrMasked performs a bitwise OR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPORD, CPU Feature: AVX512F
 func (x Int32x4) OrMasked(y Int32x4, mask Mask32x4) Int32x4
 
-// OrMasked performs a masked bitwise OR operation between two vectors.
+// OrMasked performs a bitwise OR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPORD, CPU Feature: AVX512F
 func (x Int32x8) OrMasked(y Int32x8, mask Mask32x8) Int32x8
 
-// OrMasked performs a masked bitwise OR operation between two vectors.
+// OrMasked performs a bitwise OR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPORD, CPU Feature: AVX512F
 func (x Int32x16) OrMasked(y Int32x16, mask Mask32x16) Int32x16
 
-// OrMasked performs a masked bitwise OR operation between two vectors.
+// OrMasked performs a bitwise OR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPORQ, CPU Feature: AVX512F
 func (x Int64x2) OrMasked(y Int64x2, mask Mask64x2) Int64x2
 
-// OrMasked performs a masked bitwise OR operation between two vectors.
+// OrMasked performs a bitwise OR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPORQ, CPU Feature: AVX512F
 func (x Int64x4) OrMasked(y Int64x4, mask Mask64x4) Int64x4
 
-// OrMasked performs a masked bitwise OR operation between two vectors.
+// OrMasked performs a bitwise OR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPORQ, CPU Feature: AVX512F
 func (x Int64x8) OrMasked(y Int64x8, mask Mask64x8) Int64x8
 
-// OrMasked performs a masked bitwise OR operation between two vectors.
+// OrMasked performs a bitwise OR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPORD, CPU Feature: AVX512F
 func (x Uint32x4) OrMasked(y Uint32x4, mask Mask32x4) Uint32x4
 
-// OrMasked performs a masked bitwise OR operation between two vectors.
+// OrMasked performs a bitwise OR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPORD, CPU Feature: AVX512F
 func (x Uint32x8) OrMasked(y Uint32x8, mask Mask32x8) Uint32x8
 
-// OrMasked performs a masked bitwise OR operation between two vectors.
+// OrMasked performs a bitwise OR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPORD, CPU Feature: AVX512F
 func (x Uint32x16) OrMasked(y Uint32x16, mask Mask32x16) Uint32x16
 
-// OrMasked performs a masked bitwise OR operation between two vectors.
+// OrMasked performs a bitwise OR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPORQ, CPU Feature: AVX512F
 func (x Uint64x2) OrMasked(y Uint64x2, mask Mask64x2) Uint64x2
 
-// OrMasked performs a masked bitwise OR operation between two vectors.
+// OrMasked performs a bitwise OR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPORQ, CPU Feature: AVX512F
 func (x Uint64x4) OrMasked(y Uint64x4, mask Mask64x4) Uint64x4
 
-// OrMasked performs a masked bitwise OR operation between two vectors.
+// OrMasked performs a bitwise OR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPORQ, CPU Feature: AVX512F
 func (x Uint64x8) OrMasked(y Uint64x8, mask Mask64x8) Uint64x8
@@ -5392,16 +6280,22 @@ func (x Int16x32) PairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16
 
 // PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPDPWSSD, CPU Feature: AVX512VNNI
 func (x Int16x8) PairDotProdAccumulateMasked(y Int16x8, z Int32x4, mask Mask32x4) Int32x4
 
 // PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPDPWSSD, CPU Feature: AVX512VNNI
 func (x Int16x16) PairDotProdAccumulateMasked(y Int16x16, z Int32x8, mask Mask32x8) Int32x8
 
 // PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPDPWSSD, CPU Feature: AVX512VNNI
 func (x Int16x32) PairDotProdAccumulateMasked(y Int16x32, z Int32x16, mask Mask32x16) Int32x16
 
@@ -5410,18 +6304,24 @@ func (x Int16x32) PairDotProdAccumulateMasked(y Int16x32, z Int32x16, mask Mask3
 // PairDotProdMasked multiplies the elements and add the pairs together,
 // yielding a vector of half as many elements with twice the input element size.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMADDWD, CPU Feature: AVX512BW
 func (x Int16x8) PairDotProdMasked(y Int16x8, mask Mask16x8) Int32x4
 
 // PairDotProdMasked multiplies the elements and add the pairs together,
 // yielding a vector of half as many elements with twice the input element size.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMADDWD, CPU Feature: AVX512BW
 func (x Int16x16) PairDotProdMasked(y Int16x16, mask Mask16x16) Int32x8
 
 // PairDotProdMasked multiplies the elements and add the pairs together,
 // yielding a vector of half as many elements with twice the input element size.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMADDWD, CPU Feature: AVX512BW
 func (x Int16x32) PairDotProdMasked(y Int16x32, mask Mask16x32) Int32x16
 
@@ -5992,6 +6892,8 @@ func (x Uint64x8) Permute2(y Uint64x8, indices Uint64x8) Uint64x8
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2B, CPU Feature: AVX512VBMI
 func (x Int8x16) Permute2Masked(y Int8x16, indices Uint8x16, mask Mask8x16) Int8x16
 
@@ -6000,6 +6902,8 @@ func (x Int8x16) Permute2Masked(y Int8x16, indices Uint8x16, mask Mask8x16) Int8
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2B, CPU Feature: AVX512VBMI
 func (x Uint8x16) Permute2Masked(y Uint8x16, indices Uint8x16, mask Mask8x16) Uint8x16
 
@@ -6008,6 +6912,8 @@ func (x Uint8x16) Permute2Masked(y Uint8x16, indices Uint8x16, mask Mask8x16) Ui
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2B, CPU Feature: AVX512VBMI
 func (x Int8x32) Permute2Masked(y Int8x32, indices Uint8x32, mask Mask8x32) Int8x32
 
@@ -6016,6 +6922,8 @@ func (x Int8x32) Permute2Masked(y Int8x32, indices Uint8x32, mask Mask8x32) Int8
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2B, CPU Feature: AVX512VBMI
 func (x Uint8x32) Permute2Masked(y Uint8x32, indices Uint8x32, mask Mask8x32) Uint8x32
 
@@ -6024,6 +6932,8 @@ func (x Uint8x32) Permute2Masked(y Uint8x32, indices Uint8x32, mask Mask8x32) Ui
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2B, CPU Feature: AVX512VBMI
 func (x Int8x64) Permute2Masked(y Int8x64, indices Uint8x64, mask Mask8x64) Int8x64
 
@@ -6032,6 +6942,8 @@ func (x Int8x64) Permute2Masked(y Int8x64, indices Uint8x64, mask Mask8x64) Int8
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2B, CPU Feature: AVX512VBMI
 func (x Uint8x64) Permute2Masked(y Uint8x64, indices Uint8x64, mask Mask8x64) Uint8x64
 
@@ -6040,6 +6952,8 @@ func (x Uint8x64) Permute2Masked(y Uint8x64, indices Uint8x64, mask Mask8x64) Ui
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2W, CPU Feature: AVX512BW
 func (x Int16x8) Permute2Masked(y Int16x8, indices Uint16x8, mask Mask16x8) Int16x8
 
@@ -6048,6 +6962,8 @@ func (x Int16x8) Permute2Masked(y Int16x8, indices Uint16x8, mask Mask16x8) Int1
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2W, CPU Feature: AVX512BW
 func (x Uint16x8) Permute2Masked(y Uint16x8, indices Uint16x8, mask Mask16x8) Uint16x8
 
@@ -6056,6 +6972,8 @@ func (x Uint16x8) Permute2Masked(y Uint16x8, indices Uint16x8, mask Mask16x8) Ui
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2W, CPU Feature: AVX512BW
 func (x Int16x16) Permute2Masked(y Int16x16, indices Uint16x16, mask Mask16x16) Int16x16
 
@@ -6064,6 +6982,8 @@ func (x Int16x16) Permute2Masked(y Int16x16, indices Uint16x16, mask Mask16x16)
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2W, CPU Feature: AVX512BW
 func (x Uint16x16) Permute2Masked(y Uint16x16, indices Uint16x16, mask Mask16x16) Uint16x16
 
@@ -6072,6 +6992,8 @@ func (x Uint16x16) Permute2Masked(y Uint16x16, indices Uint16x16, mask Mask16x16
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2W, CPU Feature: AVX512BW
 func (x Int16x32) Permute2Masked(y Int16x32, indices Uint16x32, mask Mask16x32) Int16x32
 
@@ -6080,6 +7002,8 @@ func (x Int16x32) Permute2Masked(y Int16x32, indices Uint16x32, mask Mask16x32)
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2W, CPU Feature: AVX512BW
 func (x Uint16x32) Permute2Masked(y Uint16x32, indices Uint16x32, mask Mask16x32) Uint16x32
 
@@ -6088,6 +7012,8 @@ func (x Uint16x32) Permute2Masked(y Uint16x32, indices Uint16x32, mask Mask16x32
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2PS, CPU Feature: AVX512F
 func (x Float32x4) Permute2Masked(y Float32x4, indices Uint32x4, mask Mask32x4) Float32x4
 
@@ -6096,6 +7022,8 @@ func (x Float32x4) Permute2Masked(y Float32x4, indices Uint32x4, mask Mask32x4)
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2D, CPU Feature: AVX512F
 func (x Int32x4) Permute2Masked(y Int32x4, indices Uint32x4, mask Mask32x4) Int32x4
 
@@ -6104,6 +7032,8 @@ func (x Int32x4) Permute2Masked(y Int32x4, indices Uint32x4, mask Mask32x4) Int3
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2D, CPU Feature: AVX512F
 func (x Uint32x4) Permute2Masked(y Uint32x4, indices Uint32x4, mask Mask32x4) Uint32x4
 
@@ -6112,6 +7042,8 @@ func (x Uint32x4) Permute2Masked(y Uint32x4, indices Uint32x4, mask Mask32x4) Ui
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2PS, CPU Feature: AVX512F
 func (x Float32x8) Permute2Masked(y Float32x8, indices Uint32x8, mask Mask32x8) Float32x8
 
@@ -6120,6 +7052,8 @@ func (x Float32x8) Permute2Masked(y Float32x8, indices Uint32x8, mask Mask32x8)
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2D, CPU Feature: AVX512F
 func (x Int32x8) Permute2Masked(y Int32x8, indices Uint32x8, mask Mask32x8) Int32x8
 
@@ -6128,6 +7062,8 @@ func (x Int32x8) Permute2Masked(y Int32x8, indices Uint32x8, mask Mask32x8) Int3
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2D, CPU Feature: AVX512F
 func (x Uint32x8) Permute2Masked(y Uint32x8, indices Uint32x8, mask Mask32x8) Uint32x8
 
@@ -6136,6 +7072,8 @@ func (x Uint32x8) Permute2Masked(y Uint32x8, indices Uint32x8, mask Mask32x8) Ui
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2PS, CPU Feature: AVX512F
 func (x Float32x16) Permute2Masked(y Float32x16, indices Uint32x16, mask Mask32x16) Float32x16
 
@@ -6144,6 +7082,8 @@ func (x Float32x16) Permute2Masked(y Float32x16, indices Uint32x16, mask Mask32x
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2D, CPU Feature: AVX512F
 func (x Int32x16) Permute2Masked(y Int32x16, indices Uint32x16, mask Mask32x16) Int32x16
 
@@ -6152,6 +7092,8 @@ func (x Int32x16) Permute2Masked(y Int32x16, indices Uint32x16, mask Mask32x16)
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2D, CPU Feature: AVX512F
 func (x Uint32x16) Permute2Masked(y Uint32x16, indices Uint32x16, mask Mask32x16) Uint32x16
 
@@ -6160,6 +7102,8 @@ func (x Uint32x16) Permute2Masked(y Uint32x16, indices Uint32x16, mask Mask32x16
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2PD, CPU Feature: AVX512F
 func (x Float64x2) Permute2Masked(y Float64x2, indices Uint64x2, mask Mask64x2) Float64x2
 
@@ -6168,6 +7112,8 @@ func (x Float64x2) Permute2Masked(y Float64x2, indices Uint64x2, mask Mask64x2)
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2Q, CPU Feature: AVX512F
 func (x Int64x2) Permute2Masked(y Int64x2, indices Uint64x2, mask Mask64x2) Int64x2
 
@@ -6176,6 +7122,8 @@ func (x Int64x2) Permute2Masked(y Int64x2, indices Uint64x2, mask Mask64x2) Int6
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2Q, CPU Feature: AVX512F
 func (x Uint64x2) Permute2Masked(y Uint64x2, indices Uint64x2, mask Mask64x2) Uint64x2
 
@@ -6184,6 +7132,8 @@ func (x Uint64x2) Permute2Masked(y Uint64x2, indices Uint64x2, mask Mask64x2) Ui
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2PD, CPU Feature: AVX512F
 func (x Float64x4) Permute2Masked(y Float64x4, indices Uint64x4, mask Mask64x4) Float64x4
 
@@ -6192,6 +7142,8 @@ func (x Float64x4) Permute2Masked(y Float64x4, indices Uint64x4, mask Mask64x4)
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2Q, CPU Feature: AVX512F
 func (x Int64x4) Permute2Masked(y Int64x4, indices Uint64x4, mask Mask64x4) Int64x4
 
@@ -6200,6 +7152,8 @@ func (x Int64x4) Permute2Masked(y Int64x4, indices Uint64x4, mask Mask64x4) Int6
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2Q, CPU Feature: AVX512F
 func (x Uint64x4) Permute2Masked(y Uint64x4, indices Uint64x4, mask Mask64x4) Uint64x4
 
@@ -6208,6 +7162,8 @@ func (x Uint64x4) Permute2Masked(y Uint64x4, indices Uint64x4, mask Mask64x4) Ui
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2PD, CPU Feature: AVX512F
 func (x Float64x8) Permute2Masked(y Float64x8, indices Uint64x8, mask Mask64x8) Float64x8
 
@@ -6216,6 +7172,8 @@ func (x Float64x8) Permute2Masked(y Float64x8, indices Uint64x8, mask Mask64x8)
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2Q, CPU Feature: AVX512F
 func (x Int64x8) Permute2Masked(y Int64x8, indices Uint64x8, mask Mask64x8) Int64x8
 
@@ -6224,6 +7182,8 @@ func (x Int64x8) Permute2Masked(y Int64x8, indices Uint64x8, mask Mask64x8) Int6
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMI2Q, CPU Feature: AVX512F
 func (x Uint64x8) Permute2Masked(y Uint64x8, indices Uint64x8, mask Mask64x8) Uint64x8
 
@@ -6233,6 +7193,8 @@ func (x Uint64x8) Permute2Masked(y Uint64x8, indices Uint64x8, mask Mask64x8) Ui
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMB, CPU Feature: AVX512VBMI
 func (x Int8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Int8x16
 
@@ -6240,6 +7202,8 @@ func (x Int8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Int8x16
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMB, CPU Feature: AVX512VBMI
 func (x Uint8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Uint8x16
 
@@ -6247,6 +7211,8 @@ func (x Uint8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Uint8x16
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMB, CPU Feature: AVX512VBMI
 func (x Int8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Int8x32
 
@@ -6254,6 +7220,8 @@ func (x Int8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Int8x32
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMB, CPU Feature: AVX512VBMI
 func (x Uint8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Uint8x32
 
@@ -6261,6 +7229,8 @@ func (x Uint8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Uint8x32
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMB, CPU Feature: AVX512VBMI
 func (x Int8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Int8x64
 
@@ -6268,6 +7238,8 @@ func (x Int8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Int8x64
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMB, CPU Feature: AVX512VBMI
 func (x Uint8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Uint8x64
 
@@ -6275,6 +7247,8 @@ func (x Uint8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Uint8x64
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMW, CPU Feature: AVX512BW
 func (x Int16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Int16x8
 
@@ -6282,6 +7256,8 @@ func (x Int16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Int16x8
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMW, CPU Feature: AVX512BW
 func (x Uint16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Uint16x8
 
@@ -6289,6 +7265,8 @@ func (x Uint16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Uint16x8
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMW, CPU Feature: AVX512BW
 func (x Int16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Int16x16
 
@@ -6296,6 +7274,8 @@ func (x Int16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Int16x16
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMW, CPU Feature: AVX512BW
 func (x Uint16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Uint16x16
 
@@ -6303,6 +7283,8 @@ func (x Uint16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Uint16x16
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMW, CPU Feature: AVX512BW
 func (x Int16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Int16x32
 
@@ -6310,6 +7292,8 @@ func (x Int16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Int16x32
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMW, CPU Feature: AVX512BW
 func (x Uint16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Uint16x32
 
@@ -6317,6 +7301,8 @@ func (x Uint16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Uint16x32
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMPS, CPU Feature: AVX512F
 func (x Float32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Float32x8
 
@@ -6324,6 +7310,8 @@ func (x Float32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Float32x8
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMD, CPU Feature: AVX512F
 func (x Int32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Int32x8
 
@@ -6331,6 +7319,8 @@ func (x Int32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Int32x8
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMD, CPU Feature: AVX512F
 func (x Uint32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Uint32x8
 
@@ -6338,6 +7328,8 @@ func (x Uint32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Uint32x8
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMPS, CPU Feature: AVX512F
 func (x Float32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Float32x16
 
@@ -6345,6 +7337,8 @@ func (x Float32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Float32x16
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMD, CPU Feature: AVX512F
 func (x Int32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Int32x16
 
@@ -6352,6 +7346,8 @@ func (x Int32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Int32x16
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMD, CPU Feature: AVX512F
 func (x Uint32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Uint32x16
 
@@ -6359,6 +7355,8 @@ func (x Uint32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Uint32x16
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMPD, CPU Feature: AVX512F
 func (x Float64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Float64x4
 
@@ -6366,6 +7364,8 @@ func (x Float64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Float64x4
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMQ, CPU Feature: AVX512F
 func (x Int64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Int64x4
 
@@ -6373,6 +7373,8 @@ func (x Int64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Int64x4
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMQ, CPU Feature: AVX512F
 func (x Uint64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Uint64x4
 
@@ -6380,6 +7382,8 @@ func (x Uint64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Uint64x4
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMPD, CPU Feature: AVX512F
 func (x Float64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Float64x8
 
@@ -6387,6 +7391,8 @@ func (x Float64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Float64x8
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMQ, CPU Feature: AVX512F
 func (x Int64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Int64x8
 
@@ -6394,6 +7400,8 @@ func (x Int64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Int64x8
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPERMQ, CPU Feature: AVX512F
 func (x Uint64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Uint64x8
 
@@ -6523,121 +7531,169 @@ func (x Uint64x8) PopCount() Uint64x8
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
 func (x Int8x16) PopCountMasked(mask Mask8x16) Int8x16
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
 func (x Int8x32) PopCountMasked(mask Mask8x32) Int8x32
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
 func (x Int8x64) PopCountMasked(mask Mask8x64) Int8x64
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
 func (x Int16x8) PopCountMasked(mask Mask16x8) Int16x8
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
 func (x Int16x16) PopCountMasked(mask Mask16x16) Int16x16
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
 func (x Int16x32) PopCountMasked(mask Mask16x32) Int16x32
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
 func (x Int32x4) PopCountMasked(mask Mask32x4) Int32x4
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
 func (x Int32x8) PopCountMasked(mask Mask32x8) Int32x8
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
 func (x Int32x16) PopCountMasked(mask Mask32x16) Int32x16
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
 func (x Int64x2) PopCountMasked(mask Mask64x2) Int64x2
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
 func (x Int64x4) PopCountMasked(mask Mask64x4) Int64x4
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
 func (x Int64x8) PopCountMasked(mask Mask64x8) Int64x8
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
 func (x Uint8x16) PopCountMasked(mask Mask8x16) Uint8x16
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
 func (x Uint8x32) PopCountMasked(mask Mask8x32) Uint8x32
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
 func (x Uint8x64) PopCountMasked(mask Mask8x64) Uint8x64
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
 func (x Uint16x8) PopCountMasked(mask Mask16x8) Uint16x8
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
 func (x Uint16x16) PopCountMasked(mask Mask16x16) Uint16x16
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
 func (x Uint16x32) PopCountMasked(mask Mask16x32) Uint16x32
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
 func (x Uint32x4) PopCountMasked(mask Mask32x4) Uint32x4
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
 func (x Uint32x8) PopCountMasked(mask Mask32x8) Uint32x8
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
 func (x Uint32x16) PopCountMasked(mask Mask32x16) Uint32x16
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
 func (x Uint64x2) PopCountMasked(mask Mask64x2) Uint64x2
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
 func (x Uint64x4) PopCountMasked(mask Mask64x4) Uint64x4
 
 // PopCountMasked counts the number of set bits in each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
 func (x Uint64x8) PopCountMasked(mask Mask64x8) Uint64x8
 
@@ -6731,6 +7787,8 @@ func (x Uint64x8) RotateAllLeft(shift uint8) Uint64x8
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPROLD, CPU Feature: AVX512F
@@ -6738,6 +7796,8 @@ func (x Int32x4) RotateAllLeftMasked(shift uint8, mask Mask32x4) Int32x4
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPROLD, CPU Feature: AVX512F
@@ -6745,6 +7805,8 @@ func (x Int32x8) RotateAllLeftMasked(shift uint8, mask Mask32x8) Int32x8
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPROLD, CPU Feature: AVX512F
@@ -6752,6 +7814,8 @@ func (x Int32x16) RotateAllLeftMasked(shift uint8, mask Mask32x16) Int32x16
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPROLQ, CPU Feature: AVX512F
@@ -6759,6 +7823,8 @@ func (x Int64x2) RotateAllLeftMasked(shift uint8, mask Mask64x2) Int64x2
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPROLQ, CPU Feature: AVX512F
@@ -6766,6 +7832,8 @@ func (x Int64x4) RotateAllLeftMasked(shift uint8, mask Mask64x4) Int64x4
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPROLQ, CPU Feature: AVX512F
@@ -6773,6 +7841,8 @@ func (x Int64x8) RotateAllLeftMasked(shift uint8, mask Mask64x8) Int64x8
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPROLD, CPU Feature: AVX512F
@@ -6780,6 +7850,8 @@ func (x Uint32x4) RotateAllLeftMasked(shift uint8, mask Mask32x4) Uint32x4
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPROLD, CPU Feature: AVX512F
@@ -6787,6 +7859,8 @@ func (x Uint32x8) RotateAllLeftMasked(shift uint8, mask Mask32x8) Uint32x8
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPROLD, CPU Feature: AVX512F
@@ -6794,6 +7868,8 @@ func (x Uint32x16) RotateAllLeftMasked(shift uint8, mask Mask32x16) Uint32x16
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPROLQ, CPU Feature: AVX512F
@@ -6801,6 +7877,8 @@ func (x Uint64x2) RotateAllLeftMasked(shift uint8, mask Mask64x2) Uint64x2
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPROLQ, CPU Feature: AVX512F
@@ -6808,6 +7886,8 @@ func (x Uint64x4) RotateAllLeftMasked(shift uint8, mask Mask64x4) Uint64x4
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPROLQ, CPU Feature: AVX512F
@@ -6903,6 +7983,8 @@ func (x Uint64x8) RotateAllRight(shift uint8) Uint64x8
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPRORD, CPU Feature: AVX512F
@@ -6910,6 +7992,8 @@ func (x Int32x4) RotateAllRightMasked(shift uint8, mask Mask32x4) Int32x4
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPRORD, CPU Feature: AVX512F
@@ -6917,6 +8001,8 @@ func (x Int32x8) RotateAllRightMasked(shift uint8, mask Mask32x8) Int32x8
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPRORD, CPU Feature: AVX512F
@@ -6924,6 +8010,8 @@ func (x Int32x16) RotateAllRightMasked(shift uint8, mask Mask32x16) Int32x16
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPRORQ, CPU Feature: AVX512F
@@ -6931,6 +8019,8 @@ func (x Int64x2) RotateAllRightMasked(shift uint8, mask Mask64x2) Int64x2
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPRORQ, CPU Feature: AVX512F
@@ -6938,6 +8028,8 @@ func (x Int64x4) RotateAllRightMasked(shift uint8, mask Mask64x4) Int64x4
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPRORQ, CPU Feature: AVX512F
@@ -6945,6 +8037,8 @@ func (x Int64x8) RotateAllRightMasked(shift uint8, mask Mask64x8) Int64x8
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPRORD, CPU Feature: AVX512F
@@ -6952,6 +8046,8 @@ func (x Uint32x4) RotateAllRightMasked(shift uint8, mask Mask32x4) Uint32x4
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPRORD, CPU Feature: AVX512F
@@ -6959,6 +8055,8 @@ func (x Uint32x8) RotateAllRightMasked(shift uint8, mask Mask32x8) Uint32x8
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPRORD, CPU Feature: AVX512F
@@ -6966,6 +8064,8 @@ func (x Uint32x16) RotateAllRightMasked(shift uint8, mask Mask32x16) Uint32x16
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPRORQ, CPU Feature: AVX512F
@@ -6973,6 +8073,8 @@ func (x Uint64x2) RotateAllRightMasked(shift uint8, mask Mask64x2) Uint64x2
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPRORQ, CPU Feature: AVX512F
@@ -6980,6 +8082,8 @@ func (x Uint64x4) RotateAllRightMasked(shift uint8, mask Mask64x4) Uint64x4
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPRORQ, CPU Feature: AVX512F
@@ -7051,61 +8155,85 @@ func (x Uint64x8) RotateLeft(y Uint64x8) Uint64x8
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPROLVD, CPU Feature: AVX512F
 func (x Int32x4) RotateLeftMasked(y Int32x4, mask Mask32x4) Int32x4
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPROLVD, CPU Feature: AVX512F
 func (x Int32x8) RotateLeftMasked(y Int32x8, mask Mask32x8) Int32x8
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPROLVD, CPU Feature: AVX512F
 func (x Int32x16) RotateLeftMasked(y Int32x16, mask Mask32x16) Int32x16
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPROLVQ, CPU Feature: AVX512F
 func (x Int64x2) RotateLeftMasked(y Int64x2, mask Mask64x2) Int64x2
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPROLVQ, CPU Feature: AVX512F
 func (x Int64x4) RotateLeftMasked(y Int64x4, mask Mask64x4) Int64x4
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPROLVQ, CPU Feature: AVX512F
 func (x Int64x8) RotateLeftMasked(y Int64x8, mask Mask64x8) Int64x8
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPROLVD, CPU Feature: AVX512F
 func (x Uint32x4) RotateLeftMasked(y Uint32x4, mask Mask32x4) Uint32x4
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPROLVD, CPU Feature: AVX512F
 func (x Uint32x8) RotateLeftMasked(y Uint32x8, mask Mask32x8) Uint32x8
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPROLVD, CPU Feature: AVX512F
 func (x Uint32x16) RotateLeftMasked(y Uint32x16, mask Mask32x16) Uint32x16
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPROLVQ, CPU Feature: AVX512F
 func (x Uint64x2) RotateLeftMasked(y Uint64x2, mask Mask64x2) Uint64x2
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPROLVQ, CPU Feature: AVX512F
 func (x Uint64x4) RotateLeftMasked(y Uint64x4, mask Mask64x4) Uint64x4
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPROLVQ, CPU Feature: AVX512F
 func (x Uint64x8) RotateLeftMasked(y Uint64x8, mask Mask64x8) Uint64x8
 
@@ -7175,61 +8303,85 @@ func (x Uint64x8) RotateRight(y Uint64x8) Uint64x8
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPRORVD, CPU Feature: AVX512F
 func (x Int32x4) RotateRightMasked(y Int32x4, mask Mask32x4) Int32x4
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPRORVD, CPU Feature: AVX512F
 func (x Int32x8) RotateRightMasked(y Int32x8, mask Mask32x8) Int32x8
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPRORVD, CPU Feature: AVX512F
 func (x Int32x16) RotateRightMasked(y Int32x16, mask Mask32x16) Int32x16
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPRORVQ, CPU Feature: AVX512F
 func (x Int64x2) RotateRightMasked(y Int64x2, mask Mask64x2) Int64x2
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPRORVQ, CPU Feature: AVX512F
 func (x Int64x4) RotateRightMasked(y Int64x4, mask Mask64x4) Int64x4
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPRORVQ, CPU Feature: AVX512F
 func (x Int64x8) RotateRightMasked(y Int64x8, mask Mask64x8) Int64x8
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPRORVD, CPU Feature: AVX512F
 func (x Uint32x4) RotateRightMasked(y Uint32x4, mask Mask32x4) Uint32x4
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPRORVD, CPU Feature: AVX512F
 func (x Uint32x8) RotateRightMasked(y Uint32x8, mask Mask32x8) Uint32x8
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPRORVD, CPU Feature: AVX512F
 func (x Uint32x16) RotateRightMasked(y Uint32x16, mask Mask32x16) Uint32x16
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPRORVQ, CPU Feature: AVX512F
 func (x Uint64x2) RotateRightMasked(y Uint64x2, mask Mask64x2) Uint64x2
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPRORVQ, CPU Feature: AVX512F
 func (x Uint64x4) RotateRightMasked(y Uint64x4, mask Mask64x4) Uint64x4
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPRORVQ, CPU Feature: AVX512F
 func (x Uint64x8) RotateRightMasked(y Uint64x8, mask Mask64x8) Uint64x8
 
@@ -7303,6 +8455,8 @@ func (x Float64x8) RoundWithPrecision(prec uint8) Float64x8
 
 // RoundWithPrecisionMasked rounds elements with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512F
@@ -7310,6 +8464,8 @@ func (x Float32x4) RoundWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
 
 // RoundWithPrecisionMasked rounds elements with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512F
@@ -7317,6 +8473,8 @@ func (x Float32x8) RoundWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
 
 // RoundWithPrecisionMasked rounds elements with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512F
@@ -7324,6 +8482,8 @@ func (x Float32x16) RoundWithPrecisionMasked(prec uint8, mask Mask32x16) Float32
 
 // RoundWithPrecisionMasked rounds elements with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512F
@@ -7331,6 +8491,8 @@ func (x Float64x2) RoundWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
 
 // RoundWithPrecisionMasked rounds elements with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512F
@@ -7338,6 +8500,8 @@ func (x Float64x4) RoundWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
 
 // RoundWithPrecisionMasked rounds elements with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512F
@@ -7409,61 +8573,85 @@ func (x Uint16x32) SaturatedAdd(y Uint16x32) Uint16x32
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDSB, CPU Feature: AVX512BW
 func (x Int8x16) SaturatedAddMasked(y Int8x16, mask Mask8x16) Int8x16
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDSB, CPU Feature: AVX512BW
 func (x Int8x32) SaturatedAddMasked(y Int8x32, mask Mask8x32) Int8x32
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDSB, CPU Feature: AVX512BW
 func (x Int8x64) SaturatedAddMasked(y Int8x64, mask Mask8x64) Int8x64
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDSW, CPU Feature: AVX512BW
 func (x Int16x8) SaturatedAddMasked(y Int16x8, mask Mask16x8) Int16x8
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDSW, CPU Feature: AVX512BW
 func (x Int16x16) SaturatedAddMasked(y Int16x16, mask Mask16x16) Int16x16
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDSW, CPU Feature: AVX512BW
 func (x Int16x32) SaturatedAddMasked(y Int16x32, mask Mask16x32) Int16x32
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDSB, CPU Feature: AVX512BW
 func (x Uint8x16) SaturatedAddMasked(y Uint8x16, mask Mask8x16) Uint8x16
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDSB, CPU Feature: AVX512BW
 func (x Uint8x32) SaturatedAddMasked(y Uint8x32, mask Mask8x32) Uint8x32
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDSB, CPU Feature: AVX512BW
 func (x Uint8x64) SaturatedAddMasked(y Uint8x64, mask Mask8x64) Uint8x64
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDSW, CPU Feature: AVX512BW
 func (x Uint16x8) SaturatedAddMasked(y Uint16x8, mask Mask16x8) Uint16x8
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDSW, CPU Feature: AVX512BW
 func (x Uint16x16) SaturatedAddMasked(y Uint16x16, mask Mask16x16) Uint16x16
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPADDSW, CPU Feature: AVX512BW
 func (x Uint16x32) SaturatedAddMasked(y Uint16x32, mask Mask16x32) Uint16x32
 
@@ -7488,16 +8676,22 @@ func (x Int16x32) SaturatedPairDotProdAccumulate(y Int16x32, z Int32x16) Int32x1
 
 // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
 func (x Int16x8) SaturatedPairDotProdAccumulateMasked(y Int16x8, z Int32x4, mask Mask32x4) Int32x4
 
 // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
 func (x Int16x16) SaturatedPairDotProdAccumulateMasked(y Int16x16, z Int32x8, mask Mask32x8) Int32x8
 
 // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
 func (x Int16x32) SaturatedPairDotProdAccumulateMasked(y Int16x32, z Int32x16, mask Mask32x16) Int32x16
 
@@ -7595,61 +8789,85 @@ func (x Uint16x32) SaturatedSub(y Uint16x32) Uint16x32
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBSB, CPU Feature: AVX512BW
 func (x Int8x16) SaturatedSubMasked(y Int8x16, mask Mask8x16) Int8x16
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBSB, CPU Feature: AVX512BW
 func (x Int8x32) SaturatedSubMasked(y Int8x32, mask Mask8x32) Int8x32
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBSB, CPU Feature: AVX512BW
 func (x Int8x64) SaturatedSubMasked(y Int8x64, mask Mask8x64) Int8x64
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBSW, CPU Feature: AVX512BW
 func (x Int16x8) SaturatedSubMasked(y Int16x8, mask Mask16x8) Int16x8
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBSW, CPU Feature: AVX512BW
 func (x Int16x16) SaturatedSubMasked(y Int16x16, mask Mask16x16) Int16x16
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBSW, CPU Feature: AVX512BW
 func (x Int16x32) SaturatedSubMasked(y Int16x32, mask Mask16x32) Int16x32
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBSB, CPU Feature: AVX512BW
 func (x Uint8x16) SaturatedSubMasked(y Uint8x16, mask Mask8x16) Uint8x16
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBSB, CPU Feature: AVX512BW
 func (x Uint8x32) SaturatedSubMasked(y Uint8x32, mask Mask8x32) Uint8x32
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBSB, CPU Feature: AVX512BW
 func (x Uint8x64) SaturatedSubMasked(y Uint8x64, mask Mask8x64) Uint8x64
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBSW, CPU Feature: AVX512BW
 func (x Uint16x8) SaturatedSubMasked(y Uint16x8, mask Mask16x8) Uint16x8
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBSW, CPU Feature: AVX512BW
 func (x Uint16x16) SaturatedSubMasked(y Uint16x16, mask Mask16x16) Uint16x16
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBSW, CPU Feature: AVX512BW
 func (x Uint16x32) SaturatedSubMasked(y Uint16x32, mask Mask16x32) Uint16x32
 
@@ -7678,18 +8896,24 @@ func (x Uint8x64) SaturatedUnsignedSignedPairDotProd(y Int8x64) Int16x32
 // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
 // yielding a vector of half as many elements with twice the input element size.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMADDUBSW, CPU Feature: AVX512BW
 func (x Uint8x16) SaturatedUnsignedSignedPairDotProdMasked(y Int8x16, mask Mask16x8) Int16x8
 
 // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
 // yielding a vector of half as many elements with twice the input element size.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMADDUBSW, CPU Feature: AVX512BW
 func (x Uint8x32) SaturatedUnsignedSignedPairDotProdMasked(y Int8x32, mask Mask16x16) Int16x16
 
 // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
 // yielding a vector of half as many elements with twice the input element size.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPMADDUBSW, CPU Feature: AVX512BW
 func (x Uint8x64) SaturatedUnsignedSignedPairDotProdMasked(y Int8x64, mask Mask16x32) Int16x32
 
@@ -7714,16 +8938,22 @@ func (x Int8x64) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int3
 
 // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
 func (x Int8x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int32x4, mask Mask32x4) Int32x4
 
 // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
 func (x Int8x32) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int32x8, mask Mask32x8) Int32x8
 
 // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
 func (x Int8x64) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int32x16, mask Mask32x16) Int32x16
 
@@ -8100,6 +9330,8 @@ func (x Uint64x8) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint64x8) Uint64x8
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
@@ -8108,6 +9340,8 @@ func (x Int16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x8, mask
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
@@ -8116,6 +9350,8 @@ func (x Int16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x16, ma
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
@@ -8124,6 +9360,8 @@ func (x Int16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x32, ma
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
@@ -8132,6 +9370,8 @@ func (x Int32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x4, mask
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
@@ -8140,6 +9380,8 @@ func (x Int32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x8, mask
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
@@ -8148,6 +9390,8 @@ func (x Int32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x16, ma
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
@@ -8156,6 +9400,8 @@ func (x Int64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x2, mask
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
@@ -8164,6 +9410,8 @@ func (x Int64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x4, mask
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
@@ -8172,6 +9420,8 @@ func (x Int64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x8, mask
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
@@ -8180,6 +9430,8 @@ func (x Uint16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x8, ma
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
@@ -8188,6 +9440,8 @@ func (x Uint16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x16,
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
@@ -8196,6 +9450,8 @@ func (x Uint16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x32,
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
@@ -8204,6 +9460,8 @@ func (x Uint32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x4, ma
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
@@ -8212,6 +9470,8 @@ func (x Uint32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x8, ma
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
@@ -8220,6 +9480,8 @@ func (x Uint32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x16,
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
@@ -8228,6 +9490,8 @@ func (x Uint64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x2, ma
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
@@ -8236,6 +9500,8 @@ func (x Uint64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x4, ma
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
@@ -8245,91 +9511,127 @@ func (x Uint64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x8, ma
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLW, CPU Feature: AVX512BW
 func (x Int16x8) ShiftAllLeftMasked(y uint64, mask Mask16x8) Int16x8
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLW, CPU Feature: AVX512BW
 func (x Int16x16) ShiftAllLeftMasked(y uint64, mask Mask16x16) Int16x16
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLW, CPU Feature: AVX512BW
 func (x Int16x32) ShiftAllLeftMasked(y uint64, mask Mask16x32) Int16x32
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLD, CPU Feature: AVX512F
 func (x Int32x4) ShiftAllLeftMasked(y uint64, mask Mask32x4) Int32x4
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLD, CPU Feature: AVX512F
 func (x Int32x8) ShiftAllLeftMasked(y uint64, mask Mask32x8) Int32x8
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLD, CPU Feature: AVX512F
 func (x Int32x16) ShiftAllLeftMasked(y uint64, mask Mask32x16) Int32x16
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLQ, CPU Feature: AVX512F
 func (x Int64x2) ShiftAllLeftMasked(y uint64, mask Mask64x2) Int64x2
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLQ, CPU Feature: AVX512F
 func (x Int64x4) ShiftAllLeftMasked(y uint64, mask Mask64x4) Int64x4
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLQ, CPU Feature: AVX512F
 func (x Int64x8) ShiftAllLeftMasked(y uint64, mask Mask64x8) Int64x8
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLW, CPU Feature: AVX512BW
 func (x Uint16x8) ShiftAllLeftMasked(y uint64, mask Mask16x8) Uint16x8
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLW, CPU Feature: AVX512BW
 func (x Uint16x16) ShiftAllLeftMasked(y uint64, mask Mask16x16) Uint16x16
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLW, CPU Feature: AVX512BW
 func (x Uint16x32) ShiftAllLeftMasked(y uint64, mask Mask16x32) Uint16x32
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLD, CPU Feature: AVX512F
 func (x Uint32x4) ShiftAllLeftMasked(y uint64, mask Mask32x4) Uint32x4
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLD, CPU Feature: AVX512F
 func (x Uint32x8) ShiftAllLeftMasked(y uint64, mask Mask32x8) Uint32x8
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLD, CPU Feature: AVX512F
 func (x Uint32x16) ShiftAllLeftMasked(y uint64, mask Mask32x16) Uint32x16
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLQ, CPU Feature: AVX512F
 func (x Uint64x2) ShiftAllLeftMasked(y uint64, mask Mask64x2) Uint64x2
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLQ, CPU Feature: AVX512F
 func (x Uint64x4) ShiftAllLeftMasked(y uint64, mask Mask64x4) Uint64x4
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLQ, CPU Feature: AVX512F
 func (x Uint64x8) ShiftAllLeftMasked(y uint64, mask Mask64x8) Uint64x8
 
@@ -8576,6 +9878,8 @@ func (x Uint64x8) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint64x8) Uint64x
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
@@ -8584,6 +9888,8 @@ func (x Int16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x8, mas
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
@@ -8592,6 +9898,8 @@ func (x Int16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x16, m
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
@@ -8600,6 +9908,8 @@ func (x Int16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x32, m
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
@@ -8608,6 +9918,8 @@ func (x Int32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x4, mas
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
@@ -8616,6 +9928,8 @@ func (x Int32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x8, mas
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
@@ -8624,6 +9938,8 @@ func (x Int32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x16, m
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
@@ -8632,6 +9948,8 @@ func (x Int64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x2, mas
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
@@ -8640,6 +9958,8 @@ func (x Int64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x4, mas
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
@@ -8648,6 +9968,8 @@ func (x Int64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x8, mas
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
@@ -8656,6 +9978,8 @@ func (x Uint16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x8, m
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
@@ -8664,6 +9988,8 @@ func (x Uint16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x16,
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
@@ -8672,6 +9998,8 @@ func (x Uint16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x32,
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
@@ -8680,6 +10008,8 @@ func (x Uint32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x4, m
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
@@ -8688,6 +10018,8 @@ func (x Uint32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x8, m
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
@@ -8696,6 +10028,8 @@ func (x Uint32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x16,
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
@@ -8704,6 +10038,8 @@ func (x Uint64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x2, m
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
@@ -8712,6 +10048,8 @@ func (x Uint64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x4, m
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
@@ -8721,91 +10059,127 @@ func (x Uint64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x8, m
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRAW, CPU Feature: AVX512BW
 func (x Int16x8) ShiftAllRightMasked(y uint64, mask Mask16x8) Int16x8
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRAW, CPU Feature: AVX512BW
 func (x Int16x16) ShiftAllRightMasked(y uint64, mask Mask16x16) Int16x16
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRAW, CPU Feature: AVX512BW
 func (x Int16x32) ShiftAllRightMasked(y uint64, mask Mask16x32) Int16x32
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRAD, CPU Feature: AVX512F
 func (x Int32x4) ShiftAllRightMasked(y uint64, mask Mask32x4) Int32x4
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRAD, CPU Feature: AVX512F
 func (x Int32x8) ShiftAllRightMasked(y uint64, mask Mask32x8) Int32x8
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRAD, CPU Feature: AVX512F
 func (x Int32x16) ShiftAllRightMasked(y uint64, mask Mask32x16) Int32x16
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRAQ, CPU Feature: AVX512F
 func (x Int64x2) ShiftAllRightMasked(y uint64, mask Mask64x2) Int64x2
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRAQ, CPU Feature: AVX512F
 func (x Int64x4) ShiftAllRightMasked(y uint64, mask Mask64x4) Int64x4
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRAQ, CPU Feature: AVX512F
 func (x Int64x8) ShiftAllRightMasked(y uint64, mask Mask64x8) Int64x8
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRLW, CPU Feature: AVX512BW
 func (x Uint16x8) ShiftAllRightMasked(y uint64, mask Mask16x8) Uint16x8
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRLW, CPU Feature: AVX512BW
 func (x Uint16x16) ShiftAllRightMasked(y uint64, mask Mask16x16) Uint16x16
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRLW, CPU Feature: AVX512BW
 func (x Uint16x32) ShiftAllRightMasked(y uint64, mask Mask16x32) Uint16x32
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRLD, CPU Feature: AVX512F
 func (x Uint32x4) ShiftAllRightMasked(y uint64, mask Mask32x4) Uint32x4
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRLD, CPU Feature: AVX512F
 func (x Uint32x8) ShiftAllRightMasked(y uint64, mask Mask32x8) Uint32x8
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRLD, CPU Feature: AVX512F
 func (x Uint32x16) ShiftAllRightMasked(y uint64, mask Mask32x16) Uint32x16
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRLQ, CPU Feature: AVX512F
 func (x Uint64x2) ShiftAllRightMasked(y uint64, mask Mask64x2) Uint64x2
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRLQ, CPU Feature: AVX512F
 func (x Uint64x4) ShiftAllRightMasked(y uint64, mask Mask64x4) Uint64x4
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRLQ, CPU Feature: AVX512F
 func (x Uint64x8) ShiftAllRightMasked(y uint64, mask Mask64x8) Uint64x8
 
@@ -9016,108 +10390,144 @@ func (x Uint64x8) ShiftLeftAndFillUpperFrom(y Uint64x8, z Uint64x8) Uint64x8
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
 func (x Int16x8) ShiftLeftAndFillUpperFromMasked(y Int16x8, z Int16x8, mask Mask16x8) Int16x8
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
 func (x Int16x16) ShiftLeftAndFillUpperFromMasked(y Int16x16, z Int16x16, mask Mask16x16) Int16x16
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
 func (x Int16x32) ShiftLeftAndFillUpperFromMasked(y Int16x32, z Int16x32, mask Mask16x32) Int16x32
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
 func (x Int32x4) ShiftLeftAndFillUpperFromMasked(y Int32x4, z Int32x4, mask Mask32x4) Int32x4
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
 func (x Int32x8) ShiftLeftAndFillUpperFromMasked(y Int32x8, z Int32x8, mask Mask32x8) Int32x8
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
 func (x Int32x16) ShiftLeftAndFillUpperFromMasked(y Int32x16, z Int32x16, mask Mask32x16) Int32x16
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
 func (x Int64x2) ShiftLeftAndFillUpperFromMasked(y Int64x2, z Int64x2, mask Mask64x2) Int64x2
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
 func (x Int64x4) ShiftLeftAndFillUpperFromMasked(y Int64x4, z Int64x4, mask Mask64x4) Int64x4
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
 func (x Int64x8) ShiftLeftAndFillUpperFromMasked(y Int64x8, z Int64x8, mask Mask64x8) Int64x8
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
 func (x Uint16x8) ShiftLeftAndFillUpperFromMasked(y Uint16x8, z Uint16x8, mask Mask16x8) Uint16x8
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
 func (x Uint16x16) ShiftLeftAndFillUpperFromMasked(y Uint16x16, z Uint16x16, mask Mask16x16) Uint16x16
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
 func (x Uint16x32) ShiftLeftAndFillUpperFromMasked(y Uint16x32, z Uint16x32, mask Mask16x32) Uint16x32
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
 func (x Uint32x4) ShiftLeftAndFillUpperFromMasked(y Uint32x4, z Uint32x4, mask Mask32x4) Uint32x4
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
 func (x Uint32x8) ShiftLeftAndFillUpperFromMasked(y Uint32x8, z Uint32x8, mask Mask32x8) Uint32x8
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
 func (x Uint32x16) ShiftLeftAndFillUpperFromMasked(y Uint32x16, z Uint32x16, mask Mask32x16) Uint32x16
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
 func (x Uint64x2) ShiftLeftAndFillUpperFromMasked(y Uint64x2, z Uint64x2, mask Mask64x2) Uint64x2
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
 func (x Uint64x4) ShiftLeftAndFillUpperFromMasked(y Uint64x4, z Uint64x4, mask Mask64x4) Uint64x4
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
 func (x Uint64x8) ShiftLeftAndFillUpperFromMasked(y Uint64x8, z Uint64x8, mask Mask64x8) Uint64x8
 
@@ -9125,91 +10535,127 @@ func (x Uint64x8) ShiftLeftAndFillUpperFromMasked(y Uint64x8, z Uint64x8, mask M
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLVW, CPU Feature: AVX512BW
 func (x Int16x8) ShiftLeftMasked(y Int16x8, mask Mask16x8) Int16x8
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLVW, CPU Feature: AVX512BW
 func (x Int16x16) ShiftLeftMasked(y Int16x16, mask Mask16x16) Int16x16
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLVW, CPU Feature: AVX512BW
 func (x Int16x32) ShiftLeftMasked(y Int16x32, mask Mask16x32) Int16x32
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLVD, CPU Feature: AVX512F
 func (x Int32x4) ShiftLeftMasked(y Int32x4, mask Mask32x4) Int32x4
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLVD, CPU Feature: AVX512F
 func (x Int32x8) ShiftLeftMasked(y Int32x8, mask Mask32x8) Int32x8
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLVD, CPU Feature: AVX512F
 func (x Int32x16) ShiftLeftMasked(y Int32x16, mask Mask32x16) Int32x16
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLVQ, CPU Feature: AVX512F
 func (x Int64x2) ShiftLeftMasked(y Int64x2, mask Mask64x2) Int64x2
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLVQ, CPU Feature: AVX512F
 func (x Int64x4) ShiftLeftMasked(y Int64x4, mask Mask64x4) Int64x4
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLVQ, CPU Feature: AVX512F
 func (x Int64x8) ShiftLeftMasked(y Int64x8, mask Mask64x8) Int64x8
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLVW, CPU Feature: AVX512BW
 func (x Uint16x8) ShiftLeftMasked(y Uint16x8, mask Mask16x8) Uint16x8
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLVW, CPU Feature: AVX512BW
 func (x Uint16x16) ShiftLeftMasked(y Uint16x16, mask Mask16x16) Uint16x16
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLVW, CPU Feature: AVX512BW
 func (x Uint16x32) ShiftLeftMasked(y Uint16x32, mask Mask16x32) Uint16x32
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLVD, CPU Feature: AVX512F
 func (x Uint32x4) ShiftLeftMasked(y Uint32x4, mask Mask32x4) Uint32x4
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLVD, CPU Feature: AVX512F
 func (x Uint32x8) ShiftLeftMasked(y Uint32x8, mask Mask32x8) Uint32x8
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLVD, CPU Feature: AVX512F
 func (x Uint32x16) ShiftLeftMasked(y Uint32x16, mask Mask32x16) Uint32x16
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLVQ, CPU Feature: AVX512F
 func (x Uint64x2) ShiftLeftMasked(y Uint64x2, mask Mask64x2) Uint64x2
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLVQ, CPU Feature: AVX512F
 func (x Uint64x4) ShiftLeftMasked(y Uint64x4, mask Mask64x4) Uint64x4
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSLLVQ, CPU Feature: AVX512F
 func (x Uint64x8) ShiftLeftMasked(y Uint64x8, mask Mask64x8) Uint64x8
 
@@ -9420,108 +10866,144 @@ func (x Uint64x8) ShiftRightAndFillUpperFrom(y Uint64x8, z Uint64x8) Uint64x8
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
 func (x Int16x8) ShiftRightAndFillUpperFromMasked(y Int16x8, z Int16x8, mask Mask16x8) Int16x8
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
 func (x Int16x16) ShiftRightAndFillUpperFromMasked(y Int16x16, z Int16x16, mask Mask16x16) Int16x16
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
 func (x Int16x32) ShiftRightAndFillUpperFromMasked(y Int16x32, z Int16x32, mask Mask16x32) Int16x32
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
 func (x Int32x4) ShiftRightAndFillUpperFromMasked(y Int32x4, z Int32x4, mask Mask32x4) Int32x4
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
 func (x Int32x8) ShiftRightAndFillUpperFromMasked(y Int32x8, z Int32x8, mask Mask32x8) Int32x8
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
 func (x Int32x16) ShiftRightAndFillUpperFromMasked(y Int32x16, z Int32x16, mask Mask32x16) Int32x16
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
 func (x Int64x2) ShiftRightAndFillUpperFromMasked(y Int64x2, z Int64x2, mask Mask64x2) Int64x2
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
 func (x Int64x4) ShiftRightAndFillUpperFromMasked(y Int64x4, z Int64x4, mask Mask64x4) Int64x4
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
 func (x Int64x8) ShiftRightAndFillUpperFromMasked(y Int64x8, z Int64x8, mask Mask64x8) Int64x8
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
 func (x Uint16x8) ShiftRightAndFillUpperFromMasked(y Uint16x8, z Uint16x8, mask Mask16x8) Uint16x8
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
 func (x Uint16x16) ShiftRightAndFillUpperFromMasked(y Uint16x16, z Uint16x16, mask Mask16x16) Uint16x16
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
 func (x Uint16x32) ShiftRightAndFillUpperFromMasked(y Uint16x32, z Uint16x32, mask Mask16x32) Uint16x32
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
 func (x Uint32x4) ShiftRightAndFillUpperFromMasked(y Uint32x4, z Uint32x4, mask Mask32x4) Uint32x4
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
 func (x Uint32x8) ShiftRightAndFillUpperFromMasked(y Uint32x8, z Uint32x8, mask Mask32x8) Uint32x8
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
 func (x Uint32x16) ShiftRightAndFillUpperFromMasked(y Uint32x16, z Uint32x16, mask Mask32x16) Uint32x16
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
 func (x Uint64x2) ShiftRightAndFillUpperFromMasked(y Uint64x2, z Uint64x2, mask Mask64x2) Uint64x2
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
 func (x Uint64x4) ShiftRightAndFillUpperFromMasked(y Uint64x4, z Uint64x4, mask Mask64x4) Uint64x4
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
 func (x Uint64x8) ShiftRightAndFillUpperFromMasked(y Uint64x8, z Uint64x8, mask Mask64x8) Uint64x8
 
@@ -9529,91 +11011,127 @@ func (x Uint64x8) ShiftRightAndFillUpperFromMasked(y Uint64x8, z Uint64x8, mask
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRAVW, CPU Feature: AVX512BW
 func (x Int16x8) ShiftRightMasked(y Int16x8, mask Mask16x8) Int16x8
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRAVW, CPU Feature: AVX512BW
 func (x Int16x16) ShiftRightMasked(y Int16x16, mask Mask16x16) Int16x16
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRAVW, CPU Feature: AVX512BW
 func (x Int16x32) ShiftRightMasked(y Int16x32, mask Mask16x32) Int16x32
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRAVD, CPU Feature: AVX512F
 func (x Int32x4) ShiftRightMasked(y Int32x4, mask Mask32x4) Int32x4
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRAVD, CPU Feature: AVX512F
 func (x Int32x8) ShiftRightMasked(y Int32x8, mask Mask32x8) Int32x8
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRAVD, CPU Feature: AVX512F
 func (x Int32x16) ShiftRightMasked(y Int32x16, mask Mask32x16) Int32x16
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRAVQ, CPU Feature: AVX512F
 func (x Int64x2) ShiftRightMasked(y Int64x2, mask Mask64x2) Int64x2
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRAVQ, CPU Feature: AVX512F
 func (x Int64x4) ShiftRightMasked(y Int64x4, mask Mask64x4) Int64x4
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRAVQ, CPU Feature: AVX512F
 func (x Int64x8) ShiftRightMasked(y Int64x8, mask Mask64x8) Int64x8
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRLVW, CPU Feature: AVX512BW
 func (x Uint16x8) ShiftRightMasked(y Uint16x8, mask Mask16x8) Uint16x8
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRLVW, CPU Feature: AVX512BW
 func (x Uint16x16) ShiftRightMasked(y Uint16x16, mask Mask16x16) Uint16x16
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRLVW, CPU Feature: AVX512BW
 func (x Uint16x32) ShiftRightMasked(y Uint16x32, mask Mask16x32) Uint16x32
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRLVD, CPU Feature: AVX512F
 func (x Uint32x4) ShiftRightMasked(y Uint32x4, mask Mask32x4) Uint32x4
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRLVD, CPU Feature: AVX512F
 func (x Uint32x8) ShiftRightMasked(y Uint32x8, mask Mask32x8) Uint32x8
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRLVD, CPU Feature: AVX512F
 func (x Uint32x16) ShiftRightMasked(y Uint32x16, mask Mask32x16) Uint32x16
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRLVQ, CPU Feature: AVX512F
 func (x Uint64x2) ShiftRightMasked(y Uint64x2, mask Mask64x2) Uint64x2
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRLVQ, CPU Feature: AVX512F
 func (x Uint64x4) ShiftRightMasked(y Uint64x4, mask Mask64x4) Uint64x4
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSRLVQ, CPU Feature: AVX512F
 func (x Uint64x8) ShiftRightMasked(y Uint64x8, mask Mask64x8) Uint64x8
 
@@ -9691,31 +11209,43 @@ func (x Float64x8) Sqrt() Float64x8
 
 // SqrtMasked computes the square root of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VSQRTPS, CPU Feature: AVX512F
 func (x Float32x4) SqrtMasked(mask Mask32x4) Float32x4
 
 // SqrtMasked computes the square root of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VSQRTPS, CPU Feature: AVX512F
 func (x Float32x8) SqrtMasked(mask Mask32x8) Float32x8
 
 // SqrtMasked computes the square root of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VSQRTPS, CPU Feature: AVX512F
 func (x Float32x16) SqrtMasked(mask Mask32x16) Float32x16
 
 // SqrtMasked computes the square root of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VSQRTPD, CPU Feature: AVX512F
 func (x Float64x2) SqrtMasked(mask Mask64x2) Float64x2
 
 // SqrtMasked computes the square root of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VSQRTPD, CPU Feature: AVX512F
 func (x Float64x4) SqrtMasked(mask Mask64x4) Float64x4
 
 // SqrtMasked computes the square root of each element.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VSQRTPD, CPU Feature: AVX512F
 func (x Float64x8) SqrtMasked(mask Mask64x8) Float64x8
 
@@ -9875,151 +11405,211 @@ func (x Uint64x8) Sub(y Uint64x8) Uint64x8
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VSUBPS, CPU Feature: AVX512F
 func (x Float32x4) SubMasked(y Float32x4, mask Mask32x4) Float32x4
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VSUBPS, CPU Feature: AVX512F
 func (x Float32x8) SubMasked(y Float32x8, mask Mask32x8) Float32x8
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VSUBPS, CPU Feature: AVX512F
 func (x Float32x16) SubMasked(y Float32x16, mask Mask32x16) Float32x16
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VSUBPD, CPU Feature: AVX512F
 func (x Float64x2) SubMasked(y Float64x2, mask Mask64x2) Float64x2
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VSUBPD, CPU Feature: AVX512F
 func (x Float64x4) SubMasked(y Float64x4, mask Mask64x4) Float64x4
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VSUBPD, CPU Feature: AVX512F
 func (x Float64x8) SubMasked(y Float64x8, mask Mask64x8) Float64x8
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBB, CPU Feature: AVX512BW
 func (x Int8x16) SubMasked(y Int8x16, mask Mask8x16) Int8x16
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBB, CPU Feature: AVX512BW
 func (x Int8x32) SubMasked(y Int8x32, mask Mask8x32) Int8x32
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBB, CPU Feature: AVX512BW
 func (x Int8x64) SubMasked(y Int8x64, mask Mask8x64) Int8x64
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBW, CPU Feature: AVX512BW
 func (x Int16x8) SubMasked(y Int16x8, mask Mask16x8) Int16x8
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBW, CPU Feature: AVX512BW
 func (x Int16x16) SubMasked(y Int16x16, mask Mask16x16) Int16x16
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBW, CPU Feature: AVX512BW
 func (x Int16x32) SubMasked(y Int16x32, mask Mask16x32) Int16x32
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBD, CPU Feature: AVX512F
 func (x Int32x4) SubMasked(y Int32x4, mask Mask32x4) Int32x4
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBD, CPU Feature: AVX512F
 func (x Int32x8) SubMasked(y Int32x8, mask Mask32x8) Int32x8
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBD, CPU Feature: AVX512F
 func (x Int32x16) SubMasked(y Int32x16, mask Mask32x16) Int32x16
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBQ, CPU Feature: AVX512F
 func (x Int64x2) SubMasked(y Int64x2, mask Mask64x2) Int64x2
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBQ, CPU Feature: AVX512F
 func (x Int64x4) SubMasked(y Int64x4, mask Mask64x4) Int64x4
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBQ, CPU Feature: AVX512F
 func (x Int64x8) SubMasked(y Int64x8, mask Mask64x8) Int64x8
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBB, CPU Feature: AVX512BW
 func (x Uint8x16) SubMasked(y Uint8x16, mask Mask8x16) Uint8x16
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBB, CPU Feature: AVX512BW
 func (x Uint8x32) SubMasked(y Uint8x32, mask Mask8x32) Uint8x32
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBB, CPU Feature: AVX512BW
 func (x Uint8x64) SubMasked(y Uint8x64, mask Mask8x64) Uint8x64
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBW, CPU Feature: AVX512BW
 func (x Uint16x8) SubMasked(y Uint16x8, mask Mask16x8) Uint16x8
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBW, CPU Feature: AVX512BW
 func (x Uint16x16) SubMasked(y Uint16x16, mask Mask16x16) Uint16x16
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBW, CPU Feature: AVX512BW
 func (x Uint16x32) SubMasked(y Uint16x32, mask Mask16x32) Uint16x32
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBD, CPU Feature: AVX512F
 func (x Uint32x4) SubMasked(y Uint32x4, mask Mask32x4) Uint32x4
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBD, CPU Feature: AVX512F
 func (x Uint32x8) SubMasked(y Uint32x8, mask Mask32x8) Uint32x8
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBD, CPU Feature: AVX512F
 func (x Uint32x16) SubMasked(y Uint32x16, mask Mask32x16) Uint32x16
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBQ, CPU Feature: AVX512F
 func (x Uint64x2) SubMasked(y Uint64x2, mask Mask64x2) Uint64x2
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBQ, CPU Feature: AVX512F
 func (x Uint64x4) SubMasked(y Uint64x4, mask Mask64x4) Uint64x4
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPSUBQ, CPU Feature: AVX512F
 func (x Uint64x8) SubMasked(y Uint64x8, mask Mask64x8) Uint64x8
 
@@ -10093,6 +11683,8 @@ func (x Float64x8) TruncWithPrecision(prec uint8) Float64x8
 
 // TruncWithPrecisionMasked truncates elements with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512F
@@ -10100,6 +11692,8 @@ func (x Float32x4) TruncWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
 
 // TruncWithPrecisionMasked truncates elements with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512F
@@ -10107,6 +11701,8 @@ func (x Float32x8) TruncWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
 
 // TruncWithPrecisionMasked truncates elements with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512F
@@ -10114,6 +11710,8 @@ func (x Float32x16) TruncWithPrecisionMasked(prec uint8, mask Mask32x16) Float32
 
 // TruncWithPrecisionMasked truncates elements with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512F
@@ -10121,6 +11719,8 @@ func (x Float64x2) TruncWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
 
 // TruncWithPrecisionMasked truncates elements with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512F
@@ -10128,6 +11728,8 @@ func (x Float64x4) TruncWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
 
 // TruncWithPrecisionMasked truncates elements with specified precision.
 //
+// This operation is applied selectively under a write mask.
+//
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512F
@@ -10154,16 +11756,22 @@ func (x Int8x64) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int
 
 // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPDPBUSD, CPU Feature: AVX512VNNI
 func (x Int8x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int32x4, mask Mask32x4) Int32x4
 
 // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPDPBUSD, CPU Feature: AVX512VNNI
 func (x Int8x32) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int32x8, mask Mask32x8) Int32x8
 
 // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
 //
+// This operation is applied selectively under a write mask.
+//
 // Asm: VPDPBUSD, CPU Feature: AVX512VNNI
 func (x Int8x64) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int32x16, mask Mask32x16) Int32x16
 
@@ -10199,7 +11807,7 @@ func (x Int32x4) Xor(y Int32x4) Int32x4
 // Asm: VPXOR, CPU Feature: AVX2
 func (x Int32x8) Xor(y Int32x8) Int32x8
 
-// Xor performs a masked bitwise XOR operation between two vectors.
+// Xor performs a bitwise XOR operation between two vectors.
 //
 // Asm: VPXORD, CPU Feature: AVX512F
 func (x Int32x16) Xor(y Int32x16) Int32x16
@@ -10214,7 +11822,7 @@ func (x Int64x2) Xor(y Int64x2) Int64x2
 // Asm: VPXOR, CPU Feature: AVX2
 func (x Int64x4) Xor(y Int64x4) Int64x4
 
-// Xor performs a masked bitwise XOR operation between two vectors.
+// Xor performs a bitwise XOR operation between two vectors.
 //
 // Asm: VPXORQ, CPU Feature: AVX512F
 func (x Int64x8) Xor(y Int64x8) Int64x8
@@ -10249,7 +11857,7 @@ func (x Uint32x4) Xor(y Uint32x4) Uint32x4
 // Asm: VPXOR, CPU Feature: AVX2
 func (x Uint32x8) Xor(y Uint32x8) Uint32x8
 
-// Xor performs a masked bitwise XOR operation between two vectors.
+// Xor performs a bitwise XOR operation between two vectors.
 //
 // Asm: VPXORD, CPU Feature: AVX512F
 func (x Uint32x16) Xor(y Uint32x16) Uint32x16
@@ -10264,69 +11872,93 @@ func (x Uint64x2) Xor(y Uint64x2) Uint64x2
 // Asm: VPXOR, CPU Feature: AVX2
 func (x Uint64x4) Xor(y Uint64x4) Uint64x4
 
-// Xor performs a masked bitwise XOR operation between two vectors.
+// Xor performs a bitwise XOR operation between two vectors.
 //
 // Asm: VPXORQ, CPU Feature: AVX512F
 func (x Uint64x8) Xor(y Uint64x8) Uint64x8
 
 /* XorMasked */
 
-// XorMasked performs a masked bitwise XOR operation between two vectors.
+// XorMasked performs a bitwise XOR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPXORD, CPU Feature: AVX512F
 func (x Int32x4) XorMasked(y Int32x4, mask Mask32x4) Int32x4
 
-// XorMasked performs a masked bitwise XOR operation between two vectors.
+// XorMasked performs a bitwise XOR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPXORD, CPU Feature: AVX512F
 func (x Int32x8) XorMasked(y Int32x8, mask Mask32x8) Int32x8
 
-// XorMasked performs a masked bitwise XOR operation between two vectors.
+// XorMasked performs a bitwise XOR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPXORD, CPU Feature: AVX512F
 func (x Int32x16) XorMasked(y Int32x16, mask Mask32x16) Int32x16
 
-// XorMasked performs a masked bitwise XOR operation between two vectors.
+// XorMasked performs a bitwise XOR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPXORQ, CPU Feature: AVX512F
 func (x Int64x2) XorMasked(y Int64x2, mask Mask64x2) Int64x2
 
-// XorMasked performs a masked bitwise XOR operation between two vectors.
+// XorMasked performs a bitwise XOR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPXORQ, CPU Feature: AVX512F
 func (x Int64x4) XorMasked(y Int64x4, mask Mask64x4) Int64x4
 
-// XorMasked performs a masked bitwise XOR operation between two vectors.
+// XorMasked performs a bitwise XOR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPXORQ, CPU Feature: AVX512F
 func (x Int64x8) XorMasked(y Int64x8, mask Mask64x8) Int64x8
 
-// XorMasked performs a masked bitwise XOR operation between two vectors.
+// XorMasked performs a bitwise XOR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPXORD, CPU Feature: AVX512F
 func (x Uint32x4) XorMasked(y Uint32x4, mask Mask32x4) Uint32x4
 
-// XorMasked performs a masked bitwise XOR operation between two vectors.
+// XorMasked performs a bitwise XOR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPXORD, CPU Feature: AVX512F
 func (x Uint32x8) XorMasked(y Uint32x8, mask Mask32x8) Uint32x8
 
-// XorMasked performs a masked bitwise XOR operation between two vectors.
+// XorMasked performs a bitwise XOR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPXORD, CPU Feature: AVX512F
 func (x Uint32x16) XorMasked(y Uint32x16, mask Mask32x16) Uint32x16
 
-// XorMasked performs a masked bitwise XOR operation between two vectors.
+// XorMasked performs a bitwise XOR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPXORQ, CPU Feature: AVX512F
 func (x Uint64x2) XorMasked(y Uint64x2, mask Mask64x2) Uint64x2
 
-// XorMasked performs a masked bitwise XOR operation between two vectors.
+// XorMasked performs a bitwise XOR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPXORQ, CPU Feature: AVX512F
 func (x Uint64x4) XorMasked(y Uint64x4, mask Mask64x4) Uint64x4
 
-// XorMasked performs a masked bitwise XOR operation between two vectors.
+// XorMasked performs a bitwise XOR operation between two vectors.
+//
+// This operation is applied selectively under a write mask.
 //
 // Asm: VPXORQ, CPU Feature: AVX512F
 func (x Uint64x8) XorMasked(y Uint64x8, mask Mask64x8) Uint64x8
-- 
2.52.0