From 55665e1e3756c0181f7572c8766749695ed1516a Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Sat, 28 Jun 2025 10:20:53 -0400
Subject: [PATCH] [dev.simd] cmd/compile: undoes reorder transform in prior
 commit, changes names

paired with simdgen CL 684655

Change-Id: I819eb601c07b21747d8a1442eb1efbf9fa5aac1d
Reviewed-on: https://go-review.googlesource.com/c/go/+/684775
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
---
 .../compile/internal/ssagen/simdintrinsics.go |  44 +--
 src/simd/stubs_amd64.go                       | 304 +++++++++---------
 2 files changed, 164 insertions(+), 184 deletions(-)

diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go
index d14b6be425..87c1327f16 100644
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -262,12 +262,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x2.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.GaloisFieldAffineTransform", opGaloisFieldAffineTransform(ssa.OpGaloisFieldAffineTransformUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.GaloisFieldAffineTransform", opGaloisFieldAffineTransform(ssa.OpGaloisFieldAffineTransformUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.GaloisFieldAffineTransform", opGaloisFieldAffineTransform(ssa.OpGaloisFieldAffineTransformUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.GaloisFieldAffineTransformInversed", opGaloisFieldAffineTransform(ssa.OpGaloisFieldAffineTransformInversedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.GaloisFieldAffineTransformInversed", opGaloisFieldAffineTransform(ssa.OpGaloisFieldAffineTransformInversedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.GaloisFieldAffineTransformInversed", opGaloisFieldAffineTransform(ssa.OpGaloisFieldAffineTransformInversedUint8x64, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint8x16.GaloisFieldAffineTransform", opLen2Imm8(ssa.OpGaloisFieldAffineTransformUint8x16, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint8x32.GaloisFieldAffineTransform", opLen2Imm8(ssa.OpGaloisFieldAffineTransformUint8x32, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint8x64.GaloisFieldAffineTransform", opLen2Imm8(ssa.OpGaloisFieldAffineTransformUint8x64, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint8x16.GaloisFieldAffineTransformInversed", opLen2Imm8(ssa.OpGaloisFieldAffineTransformInversedUint8x16, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint8x32.GaloisFieldAffineTransformInversed", opLen2Imm8(ssa.OpGaloisFieldAffineTransformInversedUint8x32, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint8x64.GaloisFieldAffineTransformInversed", opLen2Imm8(ssa.OpGaloisFieldAffineTransformInversedUint8x64, types.TypeVec512, 0), sys.AMD64)
 	addF(simdPackage, "Uint8x16.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint8x32.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint8x64.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x64, types.TypeVec512), sys.AMD64)
@@ -627,12 +627,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x2.MaskedFusedMultiplySubAdd", opLen4(ssa.OpMaskedFusedMultiplySubAddFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.MaskedFusedMultiplySubAdd", opLen4(ssa.OpMaskedFusedMultiplySubAddFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.MaskedFusedMultiplySubAdd", opLen4(ssa.OpMaskedFusedMultiplySubAddFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.MaskedGaloisFieldAffineTransform", opGaloisFieldAffineTransformMasked(ssa.OpMaskedGaloisFieldAffineTransformUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.MaskedGaloisFieldAffineTransform", opGaloisFieldAffineTransformMasked(ssa.OpMaskedGaloisFieldAffineTransformUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.MaskedGaloisFieldAffineTransform", opGaloisFieldAffineTransformMasked(ssa.OpMaskedGaloisFieldAffineTransformUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.MaskedGaloisFieldAffineTransformInversed", opGaloisFieldAffineTransformMasked(ssa.OpMaskedGaloisFieldAffineTransformInversedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.MaskedGaloisFieldAffineTransformInversed", opGaloisFieldAffineTransformMasked(ssa.OpMaskedGaloisFieldAffineTransformInversedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.MaskedGaloisFieldAffineTransformInversed", opGaloisFieldAffineTransformMasked(ssa.OpMaskedGaloisFieldAffineTransformInversedUint8x64, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint8x16.MaskedGaloisFieldAffineTransform", opLen3Imm8(ssa.OpMaskedGaloisFieldAffineTransformUint8x16, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint8x32.MaskedGaloisFieldAffineTransform", opLen3Imm8(ssa.OpMaskedGaloisFieldAffineTransformUint8x32, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint8x64.MaskedGaloisFieldAffineTransform", opLen3Imm8(ssa.OpMaskedGaloisFieldAffineTransformUint8x64, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint8x16.MaskedGaloisFieldAffineTransformInversed", opLen3Imm8(ssa.OpMaskedGaloisFieldAffineTransformInversedUint8x16, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint8x32.MaskedGaloisFieldAffineTransformInversed", opLen3Imm8(ssa.OpMaskedGaloisFieldAffineTransformInversedUint8x32, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint8x64.MaskedGaloisFieldAffineTransformInversed", opLen3Imm8(ssa.OpMaskedGaloisFieldAffineTransformInversedUint8x64, types.TypeVec512, 0), sys.AMD64)
 	addF(simdPackage, "Uint8x16.MaskedGaloisFieldMul", opLen3(ssa.OpMaskedGaloisFieldMulUint8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint8x32.MaskedGaloisFieldMul", opLen3(ssa.OpMaskedGaloisFieldMulUint8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint8x64.MaskedGaloisFieldMul", opLen3(ssa.OpMaskedGaloisFieldMulUint8x64, types.TypeVec512), sys.AMD64)
@@ -2215,23 +2215,3 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Mask64x8.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Mask64x8.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
 }
-
-func opGaloisFieldAffineTransform(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-		if args[0].Op == ssa.OpConst8 {
-			return s.newValue2I(op, t, args[0].AuxInt, args[0], args[1])
-		}
-		plainPanicSimdImm(s)
-		return s.newValue2I(op, t, 0, args[0], args[1])
-	}
-}
-
-func opGaloisFieldAffineTransformMasked(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-		if args[0].Op == ssa.OpConst8 {
-			return s.newValue3I(op, t, args[0].AuxInt, args[0], args[1], args[3])
-		}
-		plainPanicSimdImm(s)
-		return s.newValue3I(op, t, 0, args[0], args[1], args[3])
-	}
-}
diff --git a/src/simd/stubs_amd64.go b/src/simd/stubs_amd64.go
index f20a9b17ae..e589378c72 100644
--- a/src/simd/stubs_amd64.go
+++ b/src/simd/stubs_amd64.go
@@ -632,37 +632,37 @@ func (x Float64x4) Ceil() Float64x4
 // Const Immediate = 10.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) CeilSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+func (x Float32x4) CeilSuppressExceptionWithPrecision(imm uint8) Float32x4
 
 // CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
 // Const Immediate = 10.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) CeilSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+func (x Float32x8) CeilSuppressExceptionWithPrecision(imm uint8) Float32x8
 
 // CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
 // Const Immediate = 10.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) CeilSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+func (x Float32x16) CeilSuppressExceptionWithPrecision(imm uint8) Float32x16
 
 // CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
 // Const Immediate = 10.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) CeilSuppressExceptionWithPrecision(imm8 uint8) Float64x2
+func (x Float64x2) CeilSuppressExceptionWithPrecision(imm uint8) Float64x2
 
 // CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
 // Const Immediate = 10.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) CeilSuppressExceptionWithPrecision(imm8 uint8) Float64x4
+func (x Float64x4) CeilSuppressExceptionWithPrecision(imm uint8) Float64x4
 
 // CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
 // Const Immediate = 10.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) CeilSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+func (x Float64x8) CeilSuppressExceptionWithPrecision(imm uint8) Float64x8
 
 /* CeilWithPrecision */
 
@@ -670,37 +670,37 @@ func (x Float64x8) CeilSuppressExceptionWithPrecision(imm8 uint8) Float64x8
 // Const Immediate = 2.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) CeilWithPrecision(imm8 uint8) Float32x4
+func (x Float32x4) CeilWithPrecision(imm uint8) Float32x4
 
 // CeilWithPrecision rounds elements up with specified precision, masked.
 // Const Immediate = 2.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) CeilWithPrecision(imm8 uint8) Float32x8
+func (x Float32x8) CeilWithPrecision(imm uint8) Float32x8
 
 // CeilWithPrecision rounds elements up with specified precision, masked.
 // Const Immediate = 2.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) CeilWithPrecision(imm8 uint8) Float32x16
+func (x Float32x16) CeilWithPrecision(imm uint8) Float32x16
 
 // CeilWithPrecision rounds elements up with specified precision, masked.
 // Const Immediate = 2.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) CeilWithPrecision(imm8 uint8) Float64x2
+func (x Float64x2) CeilWithPrecision(imm uint8) Float64x2
 
 // CeilWithPrecision rounds elements up with specified precision, masked.
 // Const Immediate = 2.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) CeilWithPrecision(imm8 uint8) Float64x4
+func (x Float64x4) CeilWithPrecision(imm uint8) Float64x4
 
 // CeilWithPrecision rounds elements up with specified precision, masked.
 // Const Immediate = 2.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) CeilWithPrecision(imm8 uint8) Float64x8
+func (x Float64x8) CeilWithPrecision(imm uint8) Float64x8
 
 /* DiffWithCeilSuppressExceptionWithPrecision */
 
@@ -708,37 +708,37 @@ func (x Float64x8) CeilWithPrecision(imm8 uint8) Float64x8
 // Const Immediate = 10.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+func (x Float32x4) DiffWithCeilSuppressExceptionWithPrecision(imm uint8) Float32x4
 
 // DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
 // Const Immediate = 10.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+func (x Float32x8) DiffWithCeilSuppressExceptionWithPrecision(imm uint8) Float32x8
 
 // DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
 // Const Immediate = 10.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+func (x Float32x16) DiffWithCeilSuppressExceptionWithPrecision(imm uint8) Float32x16
 
 // DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
 // Const Immediate = 10.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float64x2
+func (x Float64x2) DiffWithCeilSuppressExceptionWithPrecision(imm uint8) Float64x2
 
 // DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
 // Const Immediate = 10.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float64x4
+func (x Float64x4) DiffWithCeilSuppressExceptionWithPrecision(imm uint8) Float64x4
 
 // DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
 // Const Immediate = 10.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+func (x Float64x8) DiffWithCeilSuppressExceptionWithPrecision(imm uint8) Float64x8
 
 /* DiffWithCeilWithPrecision */
 
@@ -746,37 +746,37 @@ func (x Float64x8) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float6
 // Const Immediate = 2.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) DiffWithCeilWithPrecision(imm8 uint8) Float32x4
+func (x Float32x4) DiffWithCeilWithPrecision(imm uint8) Float32x4
 
 // DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
 // Const Immediate = 2.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) DiffWithCeilWithPrecision(imm8 uint8) Float32x8
+func (x Float32x8) DiffWithCeilWithPrecision(imm uint8) Float32x8
 
 // DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
 // Const Immediate = 2.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) DiffWithCeilWithPrecision(imm8 uint8) Float32x16
+func (x Float32x16) DiffWithCeilWithPrecision(imm uint8) Float32x16
 
 // DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
 // Const Immediate = 2.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) DiffWithCeilWithPrecision(imm8 uint8) Float64x2
+func (x Float64x2) DiffWithCeilWithPrecision(imm uint8) Float64x2
 
 // DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
 // Const Immediate = 2.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) DiffWithCeilWithPrecision(imm8 uint8) Float64x4
+func (x Float64x4) DiffWithCeilWithPrecision(imm uint8) Float64x4
 
 // DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
 // Const Immediate = 2.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) DiffWithCeilWithPrecision(imm8 uint8) Float64x8
+func (x Float64x8) DiffWithCeilWithPrecision(imm uint8) Float64x8
 
 /* DiffWithFloorSuppressExceptionWithPrecision */
 
@@ -784,37 +784,37 @@ func (x Float64x8) DiffWithCeilWithPrecision(imm8 uint8) Float64x8
 // Const Immediate = 9.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+func (x Float32x4) DiffWithFloorSuppressExceptionWithPrecision(imm uint8) Float32x4
 
 // DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
 // Const Immediate = 9.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+func (x Float32x8) DiffWithFloorSuppressExceptionWithPrecision(imm uint8) Float32x8
 
 // DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
 // Const Immediate = 9.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+func (x Float32x16) DiffWithFloorSuppressExceptionWithPrecision(imm uint8) Float32x16
 
 // DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
 // Const Immediate = 9.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float64x2
+func (x Float64x2) DiffWithFloorSuppressExceptionWithPrecision(imm uint8) Float64x2
 
 // DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
 // Const Immediate = 9.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float64x4
+func (x Float64x4) DiffWithFloorSuppressExceptionWithPrecision(imm uint8) Float64x4
 
 // DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
 // Const Immediate = 9.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+func (x Float64x8) DiffWithFloorSuppressExceptionWithPrecision(imm uint8) Float64x8
 
 /* DiffWithFloorWithPrecision */
 
@@ -822,37 +822,37 @@ func (x Float64x8) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float
 // Const Immediate = 1.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) DiffWithFloorWithPrecision(imm8 uint8) Float32x4
+func (x Float32x4) DiffWithFloorWithPrecision(imm uint8) Float32x4
 
 // DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
 // Const Immediate = 1.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) DiffWithFloorWithPrecision(imm8 uint8) Float32x8
+func (x Float32x8) DiffWithFloorWithPrecision(imm uint8) Float32x8
 
 // DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
 // Const Immediate = 1.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) DiffWithFloorWithPrecision(imm8 uint8) Float32x16
+func (x Float32x16) DiffWithFloorWithPrecision(imm uint8) Float32x16
 
 // DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
 // Const Immediate = 1.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) DiffWithFloorWithPrecision(imm8 uint8) Float64x2
+func (x Float64x2) DiffWithFloorWithPrecision(imm uint8) Float64x2
 
 // DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
 // Const Immediate = 1.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) DiffWithFloorWithPrecision(imm8 uint8) Float64x4
+func (x Float64x4) DiffWithFloorWithPrecision(imm uint8) Float64x4
 
 // DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
 // Const Immediate = 1.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) DiffWithFloorWithPrecision(imm8 uint8) Float64x8
+func (x Float64x8) DiffWithFloorWithPrecision(imm uint8) Float64x8
 
 /* DiffWithRoundSuppressExceptionWithPrecision */
 
@@ -860,37 +860,37 @@ func (x Float64x8) DiffWithFloorWithPrecision(imm8 uint8) Float64x8
 // Const Immediate = 8.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+func (x Float32x4) DiffWithRoundSuppressExceptionWithPrecision(imm uint8) Float32x4
 
 // DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
 // Const Immediate = 8.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+func (x Float32x8) DiffWithRoundSuppressExceptionWithPrecision(imm uint8) Float32x8
 
 // DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
 // Const Immediate = 8.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+func (x Float32x16) DiffWithRoundSuppressExceptionWithPrecision(imm uint8) Float32x16
 
 // DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
 // Const Immediate = 8.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float64x2
+func (x Float64x2) DiffWithRoundSuppressExceptionWithPrecision(imm uint8) Float64x2
 
 // DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
 // Const Immediate = 8.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float64x4
+func (x Float64x4) DiffWithRoundSuppressExceptionWithPrecision(imm uint8) Float64x4
 
 // DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
 // Const Immediate = 8.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+func (x Float64x8) DiffWithRoundSuppressExceptionWithPrecision(imm uint8) Float64x8
 
 /* DiffWithRoundWithPrecision */
 
@@ -898,37 +898,37 @@ func (x Float64x8) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float
 // Const Immediate = 0.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) DiffWithRoundWithPrecision(imm8 uint8) Float32x4
+func (x Float32x4) DiffWithRoundWithPrecision(imm uint8) Float32x4
 
 // DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
 // Const Immediate = 0.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) DiffWithRoundWithPrecision(imm8 uint8) Float32x8
+func (x Float32x8) DiffWithRoundWithPrecision(imm uint8) Float32x8
 
 // DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
 // Const Immediate = 0.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) DiffWithRoundWithPrecision(imm8 uint8) Float32x16
+func (x Float32x16) DiffWithRoundWithPrecision(imm uint8) Float32x16
 
 // DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
 // Const Immediate = 0.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) DiffWithRoundWithPrecision(imm8 uint8) Float64x2
+func (x Float64x2) DiffWithRoundWithPrecision(imm uint8) Float64x2
 
 // DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
 // Const Immediate = 0.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) DiffWithRoundWithPrecision(imm8 uint8) Float64x4
+func (x Float64x4) DiffWithRoundWithPrecision(imm uint8) Float64x4
 
 // DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
 // Const Immediate = 0.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) DiffWithRoundWithPrecision(imm8 uint8) Float64x8
+func (x Float64x8) DiffWithRoundWithPrecision(imm uint8) Float64x8
 
 /* DiffWithTruncSuppressExceptionWithPrecision */
 
@@ -936,37 +936,37 @@ func (x Float64x8) DiffWithRoundWithPrecision(imm8 uint8) Float64x8
 // Const Immediate = 11.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+func (x Float32x4) DiffWithTruncSuppressExceptionWithPrecision(imm uint8) Float32x4
 
 // DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
 // Const Immediate = 11.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+func (x Float32x8) DiffWithTruncSuppressExceptionWithPrecision(imm uint8) Float32x8
 
 // DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
 // Const Immediate = 11.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+func (x Float32x16) DiffWithTruncSuppressExceptionWithPrecision(imm uint8) Float32x16
 
 // DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
 // Const Immediate = 11.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float64x2
+func (x Float64x2) DiffWithTruncSuppressExceptionWithPrecision(imm uint8) Float64x2
 
 // DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
 // Const Immediate = 11.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float64x4
+func (x Float64x4) DiffWithTruncSuppressExceptionWithPrecision(imm uint8) Float64x4
 
 // DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
 // Const Immediate = 11.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+func (x Float64x8) DiffWithTruncSuppressExceptionWithPrecision(imm uint8) Float64x8
 
 /* DiffWithTruncWithPrecision */
 
@@ -974,37 +974,37 @@ func (x Float64x8) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float
 // Const Immediate = 3.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) DiffWithTruncWithPrecision(imm8 uint8) Float32x4
+func (x Float32x4) DiffWithTruncWithPrecision(imm uint8) Float32x4
 
 // DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
 // Const Immediate = 3.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) DiffWithTruncWithPrecision(imm8 uint8) Float32x8
+func (x Float32x8) DiffWithTruncWithPrecision(imm uint8) Float32x8
 
 // DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
 // Const Immediate = 3.
 //
 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) DiffWithTruncWithPrecision(imm8 uint8) Float32x16
+func (x Float32x16) DiffWithTruncWithPrecision(imm uint8) Float32x16
 
 // DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
 // Const Immediate = 3.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) DiffWithTruncWithPrecision(imm8 uint8) Float64x2
+func (x Float64x2) DiffWithTruncWithPrecision(imm uint8) Float64x2
 
 // DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
 // Const Immediate = 3.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) DiffWithTruncWithPrecision(imm8 uint8) Float64x4
+func (x Float64x4) DiffWithTruncWithPrecision(imm uint8) Float64x4
 
 // DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
 // Const Immediate = 3.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) DiffWithTruncWithPrecision(imm8 uint8) Float64x8
+func (x Float64x8) DiffWithTruncWithPrecision(imm uint8) Float64x8
 
 /* Div */
 
@@ -1260,37 +1260,37 @@ func (x Float64x4) Floor() Float64x4
 // Const Immediate = 9.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) FloorSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+func (x Float32x4) FloorSuppressExceptionWithPrecision(imm uint8) Float32x4
 
 // FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
 // Const Immediate = 9.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) FloorSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+func (x Float32x8) FloorSuppressExceptionWithPrecision(imm uint8) Float32x8
 
 // FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
 // Const Immediate = 9.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) FloorSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+func (x Float32x16) FloorSuppressExceptionWithPrecision(imm uint8) Float32x16
 
 // FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
 // Const Immediate = 9.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) FloorSuppressExceptionWithPrecision(imm8 uint8) Float64x2
+func (x Float64x2) FloorSuppressExceptionWithPrecision(imm uint8) Float64x2
 
 // FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
 // Const Immediate = 9.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) FloorSuppressExceptionWithPrecision(imm8 uint8) Float64x4
+func (x Float64x4) FloorSuppressExceptionWithPrecision(imm uint8) Float64x4
 
 // FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
 // Const Immediate = 9.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) FloorSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+func (x Float64x8) FloorSuppressExceptionWithPrecision(imm uint8) Float64x8
 
 /* FloorWithPrecision */
 
@@ -1298,37 +1298,37 @@ func (x Float64x8) FloorSuppressExceptionWithPrecision(imm8 uint8) Float64x8
 // Const Immediate = 1.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) FloorWithPrecision(imm8 uint8) Float32x4
+func (x Float32x4) FloorWithPrecision(imm uint8) Float32x4
 
 // FloorWithPrecision rounds elements down with specified precision, masked.
 // Const Immediate = 1.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) FloorWithPrecision(imm8 uint8) Float32x8
+func (x Float32x8) FloorWithPrecision(imm uint8) Float32x8
 
 // FloorWithPrecision rounds elements down with specified precision, masked.
 // Const Immediate = 1.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) FloorWithPrecision(imm8 uint8) Float32x16
+func (x Float32x16) FloorWithPrecision(imm uint8) Float32x16
 
 // FloorWithPrecision rounds elements down with specified precision, masked.
 // Const Immediate = 1.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) FloorWithPrecision(imm8 uint8) Float64x2
+func (x Float64x2) FloorWithPrecision(imm uint8) Float64x2
 
 // FloorWithPrecision rounds elements down with specified precision, masked.
 // Const Immediate = 1.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) FloorWithPrecision(imm8 uint8) Float64x4
+func (x Float64x4) FloorWithPrecision(imm uint8) Float64x4
 
 // FloorWithPrecision rounds elements down with specified precision, masked.
 // Const Immediate = 1.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) FloorWithPrecision(imm8 uint8) Float64x8
+func (x Float64x8) FloorWithPrecision(imm uint8) Float64x8
 
 /* FusedMultiplyAdd */
 
@@ -1430,56 +1430,56 @@ func (x Float64x8) FusedMultiplySubAdd(y Float64x8, z Float64x8) Float64x8
 
 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
 // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
+// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
 // corresponding to a group of 8 elements in x.
 //
 // Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX
-func (x Uint8x16) GaloisFieldAffineTransform(y Uint64x2, b uint8) Uint8x16
+func (x Uint8x16) GaloisFieldAffineTransform(b uint8, y Uint64x2) Uint8x16
 
 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
 // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
+// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
 // corresponding to a group of 8 elements in x.
 //
 // Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX
-func (x Uint8x32) GaloisFieldAffineTransform(y Uint64x4, b uint8) Uint8x32
+func (x Uint8x32) GaloisFieldAffineTransform(b uint8, y Uint64x4) Uint8x32
 
 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
 // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
+// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
 // corresponding to a group of 8 elements in x.
 //
 // Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX
-func (x Uint8x64) GaloisFieldAffineTransform(y Uint64x8, b uint8) Uint8x64
+func (x Uint8x64) GaloisFieldAffineTransform(b uint8, y Uint64x8) Uint8x64
 
 /* GaloisFieldAffineTransformInversed */
 
 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8),
 // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1:
 // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
+// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
 // corresponding to a group of 8 elements in x.
 //
 // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX
-func (x Uint8x16) GaloisFieldAffineTransformInversed(y Uint64x2, b uint8) Uint8x16
+func (x Uint8x16) GaloisFieldAffineTransformInversed(b uint8, y Uint64x2) Uint8x16
 
 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8),
 // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1:
 // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
+// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
 // corresponding to a group of 8 elements in x.
 //
 // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX
-func (x Uint8x32) GaloisFieldAffineTransformInversed(y Uint64x4, b uint8) Uint8x32
+func (x Uint8x32) GaloisFieldAffineTransformInversed(b uint8, y Uint64x4) Uint8x32
 
 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8),
 // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1:
 // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
+// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
 // corresponding to a group of 8 elements in x.
 //
 // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX
-func (x Uint8x64) GaloisFieldAffineTransformInversed(y Uint64x8, b uint8) Uint8x64
+func (x Uint8x64) GaloisFieldAffineTransformInversed(b uint8, y Uint64x8) Uint8x64
 
 /* GaloisFieldMul */
 
@@ -1506,42 +1506,42 @@ func (x Uint8x64) GaloisFieldMul(y Uint8x64) Uint8x64
 // GetElem retrieves a single constant-indexed element's value.
 //
 // Asm: VPEXTRB, CPU Feature: AVX512EVEX
-func (x Int8x16) GetElem(imm8 uint8) int8
+func (x Int8x16) GetElem(imm uint8) int8
 
 // GetElem retrieves a single constant-indexed element's value.
 //
 // Asm: VPEXTRW, CPU Feature: AVX512EVEX
-func (x Int16x8) GetElem(imm8 uint8) int16
+func (x Int16x8) GetElem(imm uint8) int16
 
 // GetElem retrieves a single constant-indexed element's value.
 //
 // Asm: VPEXTRD, CPU Feature: AVX
-func (x Int32x4) GetElem(imm8 uint8) int32
+func (x Int32x4) GetElem(imm uint8) int32
 
 // GetElem retrieves a single constant-indexed element's value.
 //
 // Asm: VPEXTRQ, CPU Feature: AVX
-func (x Int64x2) GetElem(imm8 uint8) int64
+func (x Int64x2) GetElem(imm uint8) int64
 
 // GetElem retrieves a single constant-indexed element's value.
 //
 // Asm: VPEXTRB, CPU Feature: AVX512EVEX
-func (x Uint8x16) GetElem(imm8 uint8) uint8
+func (x Uint8x16) GetElem(imm uint8) uint8
 
 // GetElem retrieves a single constant-indexed element's value.
 //
 // Asm: VPEXTRW, CPU Feature: AVX512EVEX
-func (x Uint16x8) GetElem(imm8 uint8) uint16
+func (x Uint16x8) GetElem(imm uint8) uint16
 
 // GetElem retrieves a single constant-indexed element's value.
 //
 // Asm: VPEXTRD, CPU Feature: AVX
-func (x Uint32x4) GetElem(imm8 uint8) uint32
+func (x Uint32x4) GetElem(imm uint8) uint32
 
 // GetElem retrieves a single constant-indexed element's value.
 //
 // Asm: VPEXTRQ, CPU Feature: AVX
-func (x Uint64x2) GetElem(imm8 uint8) uint64
+func (x Uint64x2) GetElem(imm uint8) uint64
 
 /* Greater */
 
@@ -3573,56 +3573,56 @@ func (x Float64x8) MaskedFusedMultiplySubAdd(y Float64x8, z Float64x8, u Mask64x
 
 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
 // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
+// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
 // corresponding to a group of 8 elements in x.
 //
 // Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX
-func (x Uint8x16) MaskedGaloisFieldAffineTransform(y Uint64x2, b uint8, m Mask8x16) Uint8x16
+func (x Uint8x16) MaskedGaloisFieldAffineTransform(b uint8, y Uint64x2, m Mask8x16) Uint8x16
 
 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
 // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
+// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
 // corresponding to a group of 8 elements in x.
 //
 // Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX
-func (x Uint8x32) MaskedGaloisFieldAffineTransform(y Uint64x4, b uint8, m Mask8x32) Uint8x32
+func (x Uint8x32) MaskedGaloisFieldAffineTransform(b uint8, y Uint64x4, m Mask8x32) Uint8x32
 
 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
 // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
+// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
 // corresponding to a group of 8 elements in x.
 //
 // Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX
-func (x Uint8x64) MaskedGaloisFieldAffineTransform(y Uint64x8, b uint8, m Mask8x64) Uint8x64
+func (x Uint8x64) MaskedGaloisFieldAffineTransform(b uint8, y Uint64x8, m Mask8x64) Uint8x64
 
 /* MaskedGaloisFieldAffineTransformInversed */
 
 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8),
 // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1:
 // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
+// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
 // corresponding to a group of 8 elements in x.
 //
 // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX
-func (x Uint8x16) MaskedGaloisFieldAffineTransformInversed(y Uint64x2, b uint8, m Mask8x16) Uint8x16
+func (x Uint8x16) MaskedGaloisFieldAffineTransformInversed(b uint8, y Uint64x2, m Mask8x16) Uint8x16
 
 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8),
 // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1:
 // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
+// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
 // corresponding to a group of 8 elements in x.
 //
 // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX
-func (x Uint8x32) MaskedGaloisFieldAffineTransformInversed(y Uint64x4, b uint8, m Mask8x32) Uint8x32
+func (x Uint8x32) MaskedGaloisFieldAffineTransformInversed(b uint8, y Uint64x4, m Mask8x32) Uint8x32
 
 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8),
 // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1:
 // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
+// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
 // corresponding to a group of 8 elements in x.
 //
 // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX
-func (x Uint8x64) MaskedGaloisFieldAffineTransformInversed(y Uint64x8, b uint8, m Mask8x64) Uint8x64
+func (x Uint8x64) MaskedGaloisFieldAffineTransformInversed(b uint8, y Uint64x8, m Mask8x64) Uint8x64
 
 /* MaskedGaloisFieldMul */
 
@@ -8161,124 +8161,124 @@ func (x Uint64x8) PopCount() Uint64x8
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // Asm: VPROLD, CPU Feature: AVX512EVEX
-func (x Int32x4) RotateAllLeft(imm8 uint8) Int32x4
+func (x Int32x4) RotateAllLeft(imm uint8) Int32x4
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // Asm: VPROLD, CPU Feature: AVX512EVEX
-func (x Int32x8) RotateAllLeft(imm8 uint8) Int32x8
+func (x Int32x8) RotateAllLeft(imm uint8) Int32x8
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // Asm: VPROLD, CPU Feature: AVX512EVEX
-func (x Int32x16) RotateAllLeft(imm8 uint8) Int32x16
+func (x Int32x16) RotateAllLeft(imm uint8) Int32x16
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // Asm: VPROLQ, CPU Feature: AVX512EVEX
-func (x Int64x2) RotateAllLeft(imm8 uint8) Int64x2
+func (x Int64x2) RotateAllLeft(imm uint8) Int64x2
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // Asm: VPROLQ, CPU Feature: AVX512EVEX
-func (x Int64x4) RotateAllLeft(imm8 uint8) Int64x4
+func (x Int64x4) RotateAllLeft(imm uint8) Int64x4
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // Asm: VPROLQ, CPU Feature: AVX512EVEX
-func (x Int64x8) RotateAllLeft(imm8 uint8) Int64x8
+func (x Int64x8) RotateAllLeft(imm uint8) Int64x8
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // Asm: VPROLD, CPU Feature: AVX512EVEX
-func (x Uint32x4) RotateAllLeft(imm8 uint8) Uint32x4
+func (x Uint32x4) RotateAllLeft(imm uint8) Uint32x4
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // Asm: VPROLD, CPU Feature: AVX512EVEX
-func (x Uint32x8) RotateAllLeft(imm8 uint8) Uint32x8
+func (x Uint32x8) RotateAllLeft(imm uint8) Uint32x8
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // Asm: VPROLD, CPU Feature: AVX512EVEX
-func (x Uint32x16) RotateAllLeft(imm8 uint8) Uint32x16
+func (x Uint32x16) RotateAllLeft(imm uint8) Uint32x16
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // Asm: VPROLQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) RotateAllLeft(imm8 uint8) Uint64x2
+func (x Uint64x2) RotateAllLeft(imm uint8) Uint64x2
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // Asm: VPROLQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) RotateAllLeft(imm8 uint8) Uint64x4
+func (x Uint64x4) RotateAllLeft(imm uint8) Uint64x4
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // Asm: VPROLQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) RotateAllLeft(imm8 uint8) Uint64x8
+func (x Uint64x8) RotateAllLeft(imm uint8) Uint64x8
 
 /* RotateAllRight */
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // Asm: VPRORD, CPU Feature: AVX512EVEX
-func (x Int32x4) RotateAllRight(imm8 uint8) Int32x4
+func (x Int32x4) RotateAllRight(imm uint8) Int32x4
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // Asm: VPRORD, CPU Feature: AVX512EVEX
-func (x Int32x8) RotateAllRight(imm8 uint8) Int32x8
+func (x Int32x8) RotateAllRight(imm uint8) Int32x8
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // Asm: VPRORD, CPU Feature: AVX512EVEX
-func (x Int32x16) RotateAllRight(imm8 uint8) Int32x16
+func (x Int32x16) RotateAllRight(imm uint8) Int32x16
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // Asm: VPRORQ, CPU Feature: AVX512EVEX
-func (x Int64x2) RotateAllRight(imm8 uint8) Int64x2
+func (x Int64x2) RotateAllRight(imm uint8) Int64x2
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // Asm: VPRORQ, CPU Feature: AVX512EVEX
-func (x Int64x4) RotateAllRight(imm8 uint8) Int64x4
+func (x Int64x4) RotateAllRight(imm uint8) Int64x4
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // Asm: VPRORQ, CPU Feature: AVX512EVEX
-func (x Int64x8) RotateAllRight(imm8 uint8) Int64x8
+func (x Int64x8) RotateAllRight(imm uint8) Int64x8
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // Asm: VPRORD, CPU Feature: AVX512EVEX
-func (x Uint32x4) RotateAllRight(imm8 uint8) Uint32x4
+func (x Uint32x4) RotateAllRight(imm uint8) Uint32x4
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // Asm: VPRORD, CPU Feature: AVX512EVEX
-func (x Uint32x8) RotateAllRight(imm8 uint8) Uint32x8
+func (x Uint32x8) RotateAllRight(imm uint8) Uint32x8
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // Asm: VPRORD, CPU Feature: AVX512EVEX
-func (x Uint32x16) RotateAllRight(imm8 uint8) Uint32x16
+func (x Uint32x16) RotateAllRight(imm uint8) Uint32x16
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // Asm: VPRORQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) RotateAllRight(imm8 uint8) Uint64x2
+func (x Uint64x2) RotateAllRight(imm uint8) Uint64x2
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // Asm: VPRORQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) RotateAllRight(imm8 uint8) Uint64x4
+func (x Uint64x4) RotateAllRight(imm uint8) Uint64x4
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // Asm: VPRORQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) RotateAllRight(imm8 uint8) Uint64x8
+func (x Uint64x8) RotateAllRight(imm uint8) Uint64x8
 
 /* RotateLeft */
 
@@ -8436,37 +8436,37 @@ func (x Float64x4) Round() Float64x4
 // Const Immediate = 8.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+func (x Float32x4) RoundSuppressExceptionWithPrecision(imm uint8) Float32x4
 
 // RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
 // Const Immediate = 8.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+func (x Float32x8) RoundSuppressExceptionWithPrecision(imm uint8) Float32x8
 
 // RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
 // Const Immediate = 8.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+func (x Float32x16) RoundSuppressExceptionWithPrecision(imm uint8) Float32x16
 
 // RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
 // Const Immediate = 8.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) RoundSuppressExceptionWithPrecision(imm8 uint8) Float64x2
+func (x Float64x2) RoundSuppressExceptionWithPrecision(imm uint8) Float64x2
 
 // RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
 // Const Immediate = 8.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) RoundSuppressExceptionWithPrecision(imm8 uint8) Float64x4
+func (x Float64x4) RoundSuppressExceptionWithPrecision(imm uint8) Float64x4
 
 // RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
 // Const Immediate = 8.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) RoundSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+func (x Float64x8) RoundSuppressExceptionWithPrecision(imm uint8) Float64x8
 
 /* RoundWithPrecision */
 
@@ -8474,37 +8474,37 @@ func (x Float64x8) RoundSuppressExceptionWithPrecision(imm8 uint8) Float64x8
 // Const Immediate = 0.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) RoundWithPrecision(imm8 uint8) Float32x4
+func (x Float32x4) RoundWithPrecision(imm uint8) Float32x4
 
 // RoundWithPrecision rounds elements with specified precision.
 // Const Immediate = 0.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) RoundWithPrecision(imm8 uint8) Float32x8
+func (x Float32x8) RoundWithPrecision(imm uint8) Float32x8
 
 // RoundWithPrecision rounds elements with specified precision.
 // Const Immediate = 0.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) RoundWithPrecision(imm8 uint8) Float32x16
+func (x Float32x16) RoundWithPrecision(imm uint8) Float32x16
 
 // RoundWithPrecision rounds elements with specified precision.
 // Const Immediate = 0.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) RoundWithPrecision(imm8 uint8) Float64x2
+func (x Float64x2) RoundWithPrecision(imm uint8) Float64x2
 
 // RoundWithPrecision rounds elements with specified precision.
 // Const Immediate = 0.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) RoundWithPrecision(imm8 uint8) Float64x4
+func (x Float64x4) RoundWithPrecision(imm uint8) Float64x4
 
 // RoundWithPrecision rounds elements with specified precision.
 // Const Immediate = 0.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) RoundWithPrecision(imm8 uint8) Float64x8
+func (x Float64x8) RoundWithPrecision(imm uint8) Float64x8
 
 /* SaturatedAdd */
 
@@ -9920,37 +9920,37 @@ func (x Float64x4) Trunc() Float64x4
 // Const Immediate = 11.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+func (x Float32x4) TruncSuppressExceptionWithPrecision(imm uint8) Float32x4
 
 // TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
 // Const Immediate = 11.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+func (x Float32x8) TruncSuppressExceptionWithPrecision(imm uint8) Float32x8
 
 // TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
 // Const Immediate = 11.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+func (x Float32x16) TruncSuppressExceptionWithPrecision(imm uint8) Float32x16
 
 // TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
 // Const Immediate = 11.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x2
+func (x Float64x2) TruncSuppressExceptionWithPrecision(imm uint8) Float64x2
 
 // TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
 // Const Immediate = 11.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x4
+func (x Float64x4) TruncSuppressExceptionWithPrecision(imm uint8) Float64x4
 
 // TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
 // Const Immediate = 11.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+func (x Float64x8) TruncSuppressExceptionWithPrecision(imm uint8) Float64x8
 
 /* TruncWithPrecision */
 
@@ -9958,37 +9958,37 @@ func (x Float64x8) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x8
 // Const Immediate = 3.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x4) TruncWithPrecision(imm8 uint8) Float32x4
+func (x Float32x4) TruncWithPrecision(imm uint8) Float32x4
 
 // TruncWithPrecision truncates elements with specified precision.
 // Const Immediate = 3.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x8) TruncWithPrecision(imm8 uint8) Float32x8
+func (x Float32x8) TruncWithPrecision(imm uint8) Float32x8
 
 // TruncWithPrecision truncates elements with specified precision.
 // Const Immediate = 3.
 //
 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
-func (x Float32x16) TruncWithPrecision(imm8 uint8) Float32x16
+func (x Float32x16) TruncWithPrecision(imm uint8) Float32x16
 
 // TruncWithPrecision truncates elements with specified precision.
 // Const Immediate = 3.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x2) TruncWithPrecision(imm8 uint8) Float64x2
+func (x Float64x2) TruncWithPrecision(imm uint8) Float64x2
 
 // TruncWithPrecision truncates elements with specified precision.
 // Const Immediate = 3.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x4) TruncWithPrecision(imm8 uint8) Float64x4
+func (x Float64x4) TruncWithPrecision(imm uint8) Float64x4
 
 // TruncWithPrecision truncates elements with specified precision.
 // Const Immediate = 3.
 //
 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
-func (x Float64x8) TruncWithPrecision(imm8 uint8) Float64x8
+func (x Float64x8) TruncWithPrecision(imm uint8) Float64x8
 
 /* UnsignedSignedQuadDotProdAccumulate */
 
-- 
2.52.0