From 55665e1e3756c0181f7572c8766749695ed1516a Mon Sep 17 00:00:00 2001 From: David Chase Date: Sat, 28 Jun 2025 10:20:53 -0400 Subject: [PATCH] [dev.simd] cmd/compile: undoes reorder transform in prior commit, changes names paired with simdgen CL 684655 Change-Id: I819eb601c07b21747d8a1442eb1efbf9fa5aac1d Reviewed-on: https://go-review.googlesource.com/c/go/+/684775 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao Reviewed-by: Cherry Mui --- .../compile/internal/ssagen/simdintrinsics.go | 44 +-- src/simd/stubs_amd64.go | 304 +++++++++--------- 2 files changed, 164 insertions(+), 184 deletions(-) diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index d14b6be425..87c1327f16 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -262,12 +262,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float64x2.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x4.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x8.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.GaloisFieldAffineTransform", opGaloisFieldAffineTransform(ssa.OpGaloisFieldAffineTransformUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.GaloisFieldAffineTransform", opGaloisFieldAffineTransform(ssa.OpGaloisFieldAffineTransformUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x64.GaloisFieldAffineTransform", opGaloisFieldAffineTransform(ssa.OpGaloisFieldAffineTransformUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.GaloisFieldAffineTransformInversed", opGaloisFieldAffineTransform(ssa.OpGaloisFieldAffineTransformInversedUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.GaloisFieldAffineTransformInversed", opGaloisFieldAffineTransform(ssa.OpGaloisFieldAffineTransformInversedUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x64.GaloisFieldAffineTransformInversed", opGaloisFieldAffineTransform(ssa.OpGaloisFieldAffineTransformInversedUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.GaloisFieldAffineTransform", opLen2Imm8(ssa.OpGaloisFieldAffineTransformUint8x16, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint8x32.GaloisFieldAffineTransform", opLen2Imm8(ssa.OpGaloisFieldAffineTransformUint8x32, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint8x64.GaloisFieldAffineTransform", opLen2Imm8(ssa.OpGaloisFieldAffineTransformUint8x64, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint8x16.GaloisFieldAffineTransformInversed", opLen2Imm8(ssa.OpGaloisFieldAffineTransformInversedUint8x16, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint8x32.GaloisFieldAffineTransformInversed", opLen2Imm8(ssa.OpGaloisFieldAffineTransformInversedUint8x32, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint8x64.GaloisFieldAffineTransformInversed", opLen2Imm8(ssa.OpGaloisFieldAffineTransformInversedUint8x64, types.TypeVec512, 0), sys.AMD64) addF(simdPackage, "Uint8x16.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x32.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x64.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x64, types.TypeVec512), sys.AMD64) @@ -627,12 +627,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float64x2.MaskedFusedMultiplySubAdd", opLen4(ssa.OpMaskedFusedMultiplySubAddFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x4.MaskedFusedMultiplySubAdd", opLen4(ssa.OpMaskedFusedMultiplySubAddFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x8.MaskedFusedMultiplySubAdd", opLen4(ssa.OpMaskedFusedMultiplySubAddFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.MaskedGaloisFieldAffineTransform", opGaloisFieldAffineTransformMasked(ssa.OpMaskedGaloisFieldAffineTransformUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.MaskedGaloisFieldAffineTransform", opGaloisFieldAffineTransformMasked(ssa.OpMaskedGaloisFieldAffineTransformUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x64.MaskedGaloisFieldAffineTransform", opGaloisFieldAffineTransformMasked(ssa.OpMaskedGaloisFieldAffineTransformUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.MaskedGaloisFieldAffineTransformInversed", opGaloisFieldAffineTransformMasked(ssa.OpMaskedGaloisFieldAffineTransformInversedUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.MaskedGaloisFieldAffineTransformInversed", opGaloisFieldAffineTransformMasked(ssa.OpMaskedGaloisFieldAffineTransformInversedUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x64.MaskedGaloisFieldAffineTransformInversed", opGaloisFieldAffineTransformMasked(ssa.OpMaskedGaloisFieldAffineTransformInversedUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.MaskedGaloisFieldAffineTransform", opLen3Imm8(ssa.OpMaskedGaloisFieldAffineTransformUint8x16, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint8x32.MaskedGaloisFieldAffineTransform", opLen3Imm8(ssa.OpMaskedGaloisFieldAffineTransformUint8x32, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint8x64.MaskedGaloisFieldAffineTransform", opLen3Imm8(ssa.OpMaskedGaloisFieldAffineTransformUint8x64, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint8x16.MaskedGaloisFieldAffineTransformInversed", opLen3Imm8(ssa.OpMaskedGaloisFieldAffineTransformInversedUint8x16, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint8x32.MaskedGaloisFieldAffineTransformInversed", opLen3Imm8(ssa.OpMaskedGaloisFieldAffineTransformInversedUint8x32, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint8x64.MaskedGaloisFieldAffineTransformInversed", opLen3Imm8(ssa.OpMaskedGaloisFieldAffineTransformInversedUint8x64, types.TypeVec512, 0), sys.AMD64) addF(simdPackage, "Uint8x16.MaskedGaloisFieldMul", opLen3(ssa.OpMaskedGaloisFieldMulUint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x32.MaskedGaloisFieldMul", opLen3(ssa.OpMaskedGaloisFieldMulUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x64.MaskedGaloisFieldMul", opLen3(ssa.OpMaskedGaloisFieldMulUint8x64, types.TypeVec512), sys.AMD64) @@ -2215,23 +2215,3 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Mask64x8.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Mask64x8.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64) } - -func opGaloisFieldAffineTransform(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - if args[0].Op == ssa.OpConst8 { - return s.newValue2I(op, t, args[0].AuxInt, args[0], args[1]) - } - plainPanicSimdImm(s) - return s.newValue2I(op, t, 0, args[0], args[1]) - } -} - -func opGaloisFieldAffineTransformMasked(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - if args[0].Op == ssa.OpConst8 { - return s.newValue3I(op, t, args[0].AuxInt, args[0], args[1], args[3]) - } - plainPanicSimdImm(s) - return s.newValue3I(op, t, 0, args[0], args[1], args[3]) - } -} diff --git a/src/simd/stubs_amd64.go b/src/simd/stubs_amd64.go index f20a9b17ae..e589378c72 100644 --- a/src/simd/stubs_amd64.go +++ b/src/simd/stubs_amd64.go @@ -632,37 +632,37 @@ func (x Float64x4) Ceil() Float64x4 // Const Immediate = 10. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x4) CeilSuppressExceptionWithPrecision(imm8 uint8) Float32x4 +func (x Float32x4) CeilSuppressExceptionWithPrecision(imm uint8) Float32x4 // CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions. // Const Immediate = 10. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x8) CeilSuppressExceptionWithPrecision(imm8 uint8) Float32x8 +func (x Float32x8) CeilSuppressExceptionWithPrecision(imm uint8) Float32x8 // CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions. // Const Immediate = 10. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) CeilSuppressExceptionWithPrecision(imm8 uint8) Float32x16 +func (x Float32x16) CeilSuppressExceptionWithPrecision(imm uint8) Float32x16 // CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions. // Const Immediate = 10. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x2) CeilSuppressExceptionWithPrecision(imm8 uint8) Float64x2 +func (x Float64x2) CeilSuppressExceptionWithPrecision(imm uint8) Float64x2 // CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions. // Const Immediate = 10. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x4) CeilSuppressExceptionWithPrecision(imm8 uint8) Float64x4 +func (x Float64x4) CeilSuppressExceptionWithPrecision(imm uint8) Float64x4 // CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions. // Const Immediate = 10. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x8) CeilSuppressExceptionWithPrecision(imm8 uint8) Float64x8 +func (x Float64x8) CeilSuppressExceptionWithPrecision(imm uint8) Float64x8 /* CeilWithPrecision */ @@ -670,37 +670,37 @@ func (x Float64x8) CeilSuppressExceptionWithPrecision(imm8 uint8) Float64x8 // Const Immediate = 2. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x4) CeilWithPrecision(imm8 uint8) Float32x4 +func (x Float32x4) CeilWithPrecision(imm uint8) Float32x4 // CeilWithPrecision rounds elements up with specified precision, masked. // Const Immediate = 2. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x8) CeilWithPrecision(imm8 uint8) Float32x8 +func (x Float32x8) CeilWithPrecision(imm uint8) Float32x8 // CeilWithPrecision rounds elements up with specified precision, masked. // Const Immediate = 2. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) CeilWithPrecision(imm8 uint8) Float32x16 +func (x Float32x16) CeilWithPrecision(imm uint8) Float32x16 // CeilWithPrecision rounds elements up with specified precision, masked. // Const Immediate = 2. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x2) CeilWithPrecision(imm8 uint8) Float64x2 +func (x Float64x2) CeilWithPrecision(imm uint8) Float64x2 // CeilWithPrecision rounds elements up with specified precision, masked. // Const Immediate = 2. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x4) CeilWithPrecision(imm8 uint8) Float64x4 +func (x Float64x4) CeilWithPrecision(imm uint8) Float64x4 // CeilWithPrecision rounds elements up with specified precision, masked. // Const Immediate = 2. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x8) CeilWithPrecision(imm8 uint8) Float64x8 +func (x Float64x8) CeilWithPrecision(imm uint8) Float64x8 /* DiffWithCeilSuppressExceptionWithPrecision */ @@ -708,37 +708,37 @@ func (x Float64x8) CeilWithPrecision(imm8 uint8) Float64x8 // Const Immediate = 10. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x4) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float32x4 +func (x Float32x4) DiffWithCeilSuppressExceptionWithPrecision(imm uint8) Float32x4 // DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions. // Const Immediate = 10. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x8) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float32x8 +func (x Float32x8) DiffWithCeilSuppressExceptionWithPrecision(imm uint8) Float32x8 // DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions. // Const Immediate = 10. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float32x16 +func (x Float32x16) DiffWithCeilSuppressExceptionWithPrecision(imm uint8) Float32x16 // DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions. // Const Immediate = 10. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x2) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float64x2 +func (x Float64x2) DiffWithCeilSuppressExceptionWithPrecision(imm uint8) Float64x2 // DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions. // Const Immediate = 10. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x4) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float64x4 +func (x Float64x4) DiffWithCeilSuppressExceptionWithPrecision(imm uint8) Float64x4 // DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions. // Const Immediate = 10. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x8) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float64x8 +func (x Float64x8) DiffWithCeilSuppressExceptionWithPrecision(imm uint8) Float64x8 /* DiffWithCeilWithPrecision */ @@ -746,37 +746,37 @@ func (x Float64x8) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float6 // Const Immediate = 2. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x4) DiffWithCeilWithPrecision(imm8 uint8) Float32x4 +func (x Float32x4) DiffWithCeilWithPrecision(imm uint8) Float32x4 // DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. // Const Immediate = 2. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x8) DiffWithCeilWithPrecision(imm8 uint8) Float32x8 +func (x Float32x8) DiffWithCeilWithPrecision(imm uint8) Float32x8 // DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. // Const Immediate = 2. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) DiffWithCeilWithPrecision(imm8 uint8) Float32x16 +func (x Float32x16) DiffWithCeilWithPrecision(imm uint8) Float32x16 // DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. // Const Immediate = 2. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x2) DiffWithCeilWithPrecision(imm8 uint8) Float64x2 +func (x Float64x2) DiffWithCeilWithPrecision(imm uint8) Float64x2 // DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. // Const Immediate = 2. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x4) DiffWithCeilWithPrecision(imm8 uint8) Float64x4 +func (x Float64x4) DiffWithCeilWithPrecision(imm uint8) Float64x4 // DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. // Const Immediate = 2. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x8) DiffWithCeilWithPrecision(imm8 uint8) Float64x8 +func (x Float64x8) DiffWithCeilWithPrecision(imm uint8) Float64x8 /* DiffWithFloorSuppressExceptionWithPrecision */ @@ -784,37 +784,37 @@ func (x Float64x8) DiffWithCeilWithPrecision(imm8 uint8) Float64x8 // Const Immediate = 9. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x4) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float32x4 +func (x Float32x4) DiffWithFloorSuppressExceptionWithPrecision(imm uint8) Float32x4 // DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions. // Const Immediate = 9. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x8) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float32x8 +func (x Float32x8) DiffWithFloorSuppressExceptionWithPrecision(imm uint8) Float32x8 // DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions. // Const Immediate = 9. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float32x16 +func (x Float32x16) DiffWithFloorSuppressExceptionWithPrecision(imm uint8) Float32x16 // DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions. // Const Immediate = 9. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x2) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float64x2 +func (x Float64x2) DiffWithFloorSuppressExceptionWithPrecision(imm uint8) Float64x2 // DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions. // Const Immediate = 9. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x4) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float64x4 +func (x Float64x4) DiffWithFloorSuppressExceptionWithPrecision(imm uint8) Float64x4 // DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions. // Const Immediate = 9. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x8) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float64x8 +func (x Float64x8) DiffWithFloorSuppressExceptionWithPrecision(imm uint8) Float64x8 /* DiffWithFloorWithPrecision */ @@ -822,37 +822,37 @@ func (x Float64x8) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float // Const Immediate = 1. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x4) DiffWithFloorWithPrecision(imm8 uint8) Float32x4 +func (x Float32x4) DiffWithFloorWithPrecision(imm uint8) Float32x4 // DiffWithFloorWithPrecision computes the difference after flooring with specified precision. // Const Immediate = 1. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x8) DiffWithFloorWithPrecision(imm8 uint8) Float32x8 +func (x Float32x8) DiffWithFloorWithPrecision(imm uint8) Float32x8 // DiffWithFloorWithPrecision computes the difference after flooring with specified precision. // Const Immediate = 1. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) DiffWithFloorWithPrecision(imm8 uint8) Float32x16 +func (x Float32x16) DiffWithFloorWithPrecision(imm uint8) Float32x16 // DiffWithFloorWithPrecision computes the difference after flooring with specified precision. // Const Immediate = 1. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x2) DiffWithFloorWithPrecision(imm8 uint8) Float64x2 +func (x Float64x2) DiffWithFloorWithPrecision(imm uint8) Float64x2 // DiffWithFloorWithPrecision computes the difference after flooring with specified precision. // Const Immediate = 1. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x4) DiffWithFloorWithPrecision(imm8 uint8) Float64x4 +func (x Float64x4) DiffWithFloorWithPrecision(imm uint8) Float64x4 // DiffWithFloorWithPrecision computes the difference after flooring with specified precision. // Const Immediate = 1. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x8) DiffWithFloorWithPrecision(imm8 uint8) Float64x8 +func (x Float64x8) DiffWithFloorWithPrecision(imm uint8) Float64x8 /* DiffWithRoundSuppressExceptionWithPrecision */ @@ -860,37 +860,37 @@ func (x Float64x8) DiffWithFloorWithPrecision(imm8 uint8) Float64x8 // Const Immediate = 8. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x4) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float32x4 +func (x Float32x4) DiffWithRoundSuppressExceptionWithPrecision(imm uint8) Float32x4 // DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions. // Const Immediate = 8. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x8) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float32x8 +func (x Float32x8) DiffWithRoundSuppressExceptionWithPrecision(imm uint8) Float32x8 // DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions. // Const Immediate = 8. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float32x16 +func (x Float32x16) DiffWithRoundSuppressExceptionWithPrecision(imm uint8) Float32x16 // DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions. // Const Immediate = 8. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x2) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float64x2 +func (x Float64x2) DiffWithRoundSuppressExceptionWithPrecision(imm uint8) Float64x2 // DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions. // Const Immediate = 8. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x4) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float64x4 +func (x Float64x4) DiffWithRoundSuppressExceptionWithPrecision(imm uint8) Float64x4 // DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions. // Const Immediate = 8. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x8) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float64x8 +func (x Float64x8) DiffWithRoundSuppressExceptionWithPrecision(imm uint8) Float64x8 /* DiffWithRoundWithPrecision */ @@ -898,37 +898,37 @@ func (x Float64x8) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float // Const Immediate = 0. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x4) DiffWithRoundWithPrecision(imm8 uint8) Float32x4 +func (x Float32x4) DiffWithRoundWithPrecision(imm uint8) Float32x4 // DiffWithRoundWithPrecision computes the difference after rounding with specified precision. // Const Immediate = 0. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x8) DiffWithRoundWithPrecision(imm8 uint8) Float32x8 +func (x Float32x8) DiffWithRoundWithPrecision(imm uint8) Float32x8 // DiffWithRoundWithPrecision computes the difference after rounding with specified precision. // Const Immediate = 0. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) DiffWithRoundWithPrecision(imm8 uint8) Float32x16 +func (x Float32x16) DiffWithRoundWithPrecision(imm uint8) Float32x16 // DiffWithRoundWithPrecision computes the difference after rounding with specified precision. // Const Immediate = 0. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x2) DiffWithRoundWithPrecision(imm8 uint8) Float64x2 +func (x Float64x2) DiffWithRoundWithPrecision(imm uint8) Float64x2 // DiffWithRoundWithPrecision computes the difference after rounding with specified precision. // Const Immediate = 0. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x4) DiffWithRoundWithPrecision(imm8 uint8) Float64x4 +func (x Float64x4) DiffWithRoundWithPrecision(imm uint8) Float64x4 // DiffWithRoundWithPrecision computes the difference after rounding with specified precision. // Const Immediate = 0. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x8) DiffWithRoundWithPrecision(imm8 uint8) Float64x8 +func (x Float64x8) DiffWithRoundWithPrecision(imm uint8) Float64x8 /* DiffWithTruncSuppressExceptionWithPrecision */ @@ -936,37 +936,37 @@ func (x Float64x8) DiffWithRoundWithPrecision(imm8 uint8) Float64x8 // Const Immediate = 11. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x4) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float32x4 +func (x Float32x4) DiffWithTruncSuppressExceptionWithPrecision(imm uint8) Float32x4 // DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions. // Const Immediate = 11. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x8) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float32x8 +func (x Float32x8) DiffWithTruncSuppressExceptionWithPrecision(imm uint8) Float32x8 // DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions. // Const Immediate = 11. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float32x16 +func (x Float32x16) DiffWithTruncSuppressExceptionWithPrecision(imm uint8) Float32x16 // DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions. // Const Immediate = 11. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x2) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float64x2 +func (x Float64x2) DiffWithTruncSuppressExceptionWithPrecision(imm uint8) Float64x2 // DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions. // Const Immediate = 11. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x4) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float64x4 +func (x Float64x4) DiffWithTruncSuppressExceptionWithPrecision(imm uint8) Float64x4 // DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions. // Const Immediate = 11. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x8) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float64x8 +func (x Float64x8) DiffWithTruncSuppressExceptionWithPrecision(imm uint8) Float64x8 /* DiffWithTruncWithPrecision */ @@ -974,37 +974,37 @@ func (x Float64x8) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float // Const Immediate = 3. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x4) DiffWithTruncWithPrecision(imm8 uint8) Float32x4 +func (x Float32x4) DiffWithTruncWithPrecision(imm uint8) Float32x4 // DiffWithTruncWithPrecision computes the difference after truncating with specified precision. // Const Immediate = 3. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x8) DiffWithTruncWithPrecision(imm8 uint8) Float32x8 +func (x Float32x8) DiffWithTruncWithPrecision(imm uint8) Float32x8 // DiffWithTruncWithPrecision computes the difference after truncating with specified precision. // Const Immediate = 3. // // Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) DiffWithTruncWithPrecision(imm8 uint8) Float32x16 +func (x Float32x16) DiffWithTruncWithPrecision(imm uint8) Float32x16 // DiffWithTruncWithPrecision computes the difference after truncating with specified precision. // Const Immediate = 3. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x2) DiffWithTruncWithPrecision(imm8 uint8) Float64x2 +func (x Float64x2) DiffWithTruncWithPrecision(imm uint8) Float64x2 // DiffWithTruncWithPrecision computes the difference after truncating with specified precision. // Const Immediate = 3. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x4) DiffWithTruncWithPrecision(imm8 uint8) Float64x4 +func (x Float64x4) DiffWithTruncWithPrecision(imm uint8) Float64x4 // DiffWithTruncWithPrecision computes the difference after truncating with specified precision. // Const Immediate = 3. // // Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x8) DiffWithTruncWithPrecision(imm8 uint8) Float64x8 +func (x Float64x8) DiffWithTruncWithPrecision(imm uint8) Float64x8 /* Div */ @@ -1260,37 +1260,37 @@ func (x Float64x4) Floor() Float64x4 // Const Immediate = 9. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x4) FloorSuppressExceptionWithPrecision(imm8 uint8) Float32x4 +func (x Float32x4) FloorSuppressExceptionWithPrecision(imm uint8) Float32x4 // FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked. // Const Immediate = 9. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x8) FloorSuppressExceptionWithPrecision(imm8 uint8) Float32x8 +func (x Float32x8) FloorSuppressExceptionWithPrecision(imm uint8) Float32x8 // FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked. // Const Immediate = 9. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) FloorSuppressExceptionWithPrecision(imm8 uint8) Float32x16 +func (x Float32x16) FloorSuppressExceptionWithPrecision(imm uint8) Float32x16 // FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked. // Const Immediate = 9. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x2) FloorSuppressExceptionWithPrecision(imm8 uint8) Float64x2 +func (x Float64x2) FloorSuppressExceptionWithPrecision(imm uint8) Float64x2 // FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked. // Const Immediate = 9. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x4) FloorSuppressExceptionWithPrecision(imm8 uint8) Float64x4 +func (x Float64x4) FloorSuppressExceptionWithPrecision(imm uint8) Float64x4 // FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked. // Const Immediate = 9. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x8) FloorSuppressExceptionWithPrecision(imm8 uint8) Float64x8 +func (x Float64x8) FloorSuppressExceptionWithPrecision(imm uint8) Float64x8 /* FloorWithPrecision */ @@ -1298,37 +1298,37 @@ func (x Float64x8) FloorSuppressExceptionWithPrecision(imm8 uint8) Float64x8 // Const Immediate = 1. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x4) FloorWithPrecision(imm8 uint8) Float32x4 +func (x Float32x4) FloorWithPrecision(imm uint8) Float32x4 // FloorWithPrecision rounds elements down with specified precision, masked. // Const Immediate = 1. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x8) FloorWithPrecision(imm8 uint8) Float32x8 +func (x Float32x8) FloorWithPrecision(imm uint8) Float32x8 // FloorWithPrecision rounds elements down with specified precision, masked. // Const Immediate = 1. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) FloorWithPrecision(imm8 uint8) Float32x16 +func (x Float32x16) FloorWithPrecision(imm uint8) Float32x16 // FloorWithPrecision rounds elements down with specified precision, masked. // Const Immediate = 1. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x2) FloorWithPrecision(imm8 uint8) Float64x2 +func (x Float64x2) FloorWithPrecision(imm uint8) Float64x2 // FloorWithPrecision rounds elements down with specified precision, masked. // Const Immediate = 1. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x4) FloorWithPrecision(imm8 uint8) Float64x4 +func (x Float64x4) FloorWithPrecision(imm uint8) Float64x4 // FloorWithPrecision rounds elements down with specified precision, masked. // Const Immediate = 1. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x8) FloorWithPrecision(imm8 uint8) Float64x8 +func (x Float64x8) FloorWithPrecision(imm uint8) Float64x8 /* FusedMultiplyAdd */ @@ -1430,56 +1430,56 @@ func (x Float64x8) FusedMultiplySubAdd(y Float64x8, z Float64x8) Float64x8 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8): // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; -// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y // corresponding to a group of 8 elements in x. // // Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX -func (x Uint8x16) GaloisFieldAffineTransform(y Uint64x2, b uint8) Uint8x16 +func (x Uint8x16) GaloisFieldAffineTransform(b uint8, y Uint64x2) Uint8x16 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8): // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; -// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y // corresponding to a group of 8 elements in x. // // Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX -func (x Uint8x32) GaloisFieldAffineTransform(y Uint64x4, b uint8) Uint8x32 +func (x Uint8x32) GaloisFieldAffineTransform(b uint8, y Uint64x4) Uint8x32 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8): // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; -// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y // corresponding to a group of 8 elements in x. // // Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX -func (x Uint8x64) GaloisFieldAffineTransform(y Uint64x8, b uint8) Uint8x64 +func (x Uint8x64) GaloisFieldAffineTransform(b uint8, y Uint64x8) Uint8x64 /* GaloisFieldAffineTransformInversed */ // GaloisFieldAffineTransform computes an affine transformation in GF(2^8), // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; -// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y // corresponding to a group of 8 elements in x. // // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX -func (x Uint8x16) GaloisFieldAffineTransformInversed(y Uint64x2, b uint8) Uint8x16 +func (x Uint8x16) GaloisFieldAffineTransformInversed(b uint8, y Uint64x2) Uint8x16 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8), // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; -// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y // corresponding to a group of 8 elements in x. // // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX -func (x Uint8x32) GaloisFieldAffineTransformInversed(y Uint64x4, b uint8) Uint8x32 +func (x Uint8x32) GaloisFieldAffineTransformInversed(b uint8, y Uint64x4) Uint8x32 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8), // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; -// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y // corresponding to a group of 8 elements in x. // // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX -func (x Uint8x64) GaloisFieldAffineTransformInversed(y Uint64x8, b uint8) Uint8x64 +func (x Uint8x64) GaloisFieldAffineTransformInversed(b uint8, y Uint64x8) Uint8x64 /* GaloisFieldMul */ @@ -1506,42 +1506,42 @@ func (x Uint8x64) GaloisFieldMul(y Uint8x64) Uint8x64 // GetElem retrieves a single constant-indexed element's value. // // Asm: VPEXTRB, CPU Feature: AVX512EVEX -func (x Int8x16) GetElem(imm8 uint8) int8 +func (x Int8x16) GetElem(imm uint8) int8 // GetElem retrieves a single constant-indexed element's value. // // Asm: VPEXTRW, CPU Feature: AVX512EVEX -func (x Int16x8) GetElem(imm8 uint8) int16 +func (x Int16x8) GetElem(imm uint8) int16 // GetElem retrieves a single constant-indexed element's value. // // Asm: VPEXTRD, CPU Feature: AVX -func (x Int32x4) GetElem(imm8 uint8) int32 +func (x Int32x4) GetElem(imm uint8) int32 // GetElem retrieves a single constant-indexed element's value. // // Asm: VPEXTRQ, CPU Feature: AVX -func (x Int64x2) GetElem(imm8 uint8) int64 +func (x Int64x2) GetElem(imm uint8) int64 // GetElem retrieves a single constant-indexed element's value. // // Asm: VPEXTRB, CPU Feature: AVX512EVEX -func (x Uint8x16) GetElem(imm8 uint8) uint8 +func (x Uint8x16) GetElem(imm uint8) uint8 // GetElem retrieves a single constant-indexed element's value. // // Asm: VPEXTRW, CPU Feature: AVX512EVEX -func (x Uint16x8) GetElem(imm8 uint8) uint16 +func (x Uint16x8) GetElem(imm uint8) uint16 // GetElem retrieves a single constant-indexed element's value. // // Asm: VPEXTRD, CPU Feature: AVX -func (x Uint32x4) GetElem(imm8 uint8) uint32 +func (x Uint32x4) GetElem(imm uint8) uint32 // GetElem retrieves a single constant-indexed element's value. // // Asm: VPEXTRQ, CPU Feature: AVX -func (x Uint64x2) GetElem(imm8 uint8) uint64 +func (x Uint64x2) GetElem(imm uint8) uint64 /* Greater */ @@ -3573,56 +3573,56 @@ func (x Float64x8) MaskedFusedMultiplySubAdd(y Float64x8, z Float64x8, u Mask64x // GaloisFieldAffineTransform computes an affine transformation in GF(2^8): // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; -// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y // corresponding to a group of 8 elements in x. // // Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedGaloisFieldAffineTransform(y Uint64x2, b uint8, m Mask8x16) Uint8x16 +func (x Uint8x16) MaskedGaloisFieldAffineTransform(b uint8, y Uint64x2, m Mask8x16) Uint8x16 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8): // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; -// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y // corresponding to a group of 8 elements in x. // // Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedGaloisFieldAffineTransform(y Uint64x4, b uint8, m Mask8x32) Uint8x32 +func (x Uint8x32) MaskedGaloisFieldAffineTransform(b uint8, y Uint64x4, m Mask8x32) Uint8x32 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8): // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; -// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y // corresponding to a group of 8 elements in x. // // Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedGaloisFieldAffineTransform(y Uint64x8, b uint8, m Mask8x64) Uint8x64 +func (x Uint8x64) MaskedGaloisFieldAffineTransform(b uint8, y Uint64x8, m Mask8x64) Uint8x64 /* MaskedGaloisFieldAffineTransformInversed */ // GaloisFieldAffineTransform computes an affine transformation in GF(2^8), // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; -// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y // corresponding to a group of 8 elements in x. // // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedGaloisFieldAffineTransformInversed(y Uint64x2, b uint8, m Mask8x16) Uint8x16 +func (x Uint8x16) MaskedGaloisFieldAffineTransformInversed(b uint8, y Uint64x2, m Mask8x16) Uint8x16 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8), // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; -// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y // corresponding to a group of 8 elements in x. // // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedGaloisFieldAffineTransformInversed(y Uint64x4, b uint8, m Mask8x32) Uint8x32 +func (x Uint8x32) MaskedGaloisFieldAffineTransformInversed(b uint8, y Uint64x4, m Mask8x32) Uint8x32 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8), // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; -// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y // corresponding to a group of 8 elements in x. // // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedGaloisFieldAffineTransformInversed(y Uint64x8, b uint8, m Mask8x64) Uint8x64 +func (x Uint8x64) MaskedGaloisFieldAffineTransformInversed(b uint8, y Uint64x8, m Mask8x64) Uint8x64 /* MaskedGaloisFieldMul */ @@ -8161,124 +8161,124 @@ func (x Uint64x8) PopCount() Uint64x8 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. // // Asm: VPROLD, CPU Feature: AVX512EVEX -func (x Int32x4) RotateAllLeft(imm8 uint8) Int32x4 +func (x Int32x4) RotateAllLeft(imm uint8) Int32x4 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. // // Asm: VPROLD, CPU Feature: AVX512EVEX -func (x Int32x8) RotateAllLeft(imm8 uint8) Int32x8 +func (x Int32x8) RotateAllLeft(imm uint8) Int32x8 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. // // Asm: VPROLD, CPU Feature: AVX512EVEX -func (x Int32x16) RotateAllLeft(imm8 uint8) Int32x16 +func (x Int32x16) RotateAllLeft(imm uint8) Int32x16 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. // // Asm: VPROLQ, CPU Feature: AVX512EVEX -func (x Int64x2) RotateAllLeft(imm8 uint8) Int64x2 +func (x Int64x2) RotateAllLeft(imm uint8) Int64x2 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. // // Asm: VPROLQ, CPU Feature: AVX512EVEX -func (x Int64x4) RotateAllLeft(imm8 uint8) Int64x4 +func (x Int64x4) RotateAllLeft(imm uint8) Int64x4 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. // // Asm: VPROLQ, CPU Feature: AVX512EVEX -func (x Int64x8) RotateAllLeft(imm8 uint8) Int64x8 +func (x Int64x8) RotateAllLeft(imm uint8) Int64x8 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. // // Asm: VPROLD, CPU Feature: AVX512EVEX -func (x Uint32x4) RotateAllLeft(imm8 uint8) Uint32x4 +func (x Uint32x4) RotateAllLeft(imm uint8) Uint32x4 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. // // Asm: VPROLD, CPU Feature: AVX512EVEX -func (x Uint32x8) RotateAllLeft(imm8 uint8) Uint32x8 +func (x Uint32x8) RotateAllLeft(imm uint8) Uint32x8 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. // // Asm: VPROLD, CPU Feature: AVX512EVEX -func (x Uint32x16) RotateAllLeft(imm8 uint8) Uint32x16 +func (x Uint32x16) RotateAllLeft(imm uint8) Uint32x16 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. // // Asm: VPROLQ, CPU Feature: AVX512EVEX -func (x Uint64x2) RotateAllLeft(imm8 uint8) Uint64x2 +func (x Uint64x2) RotateAllLeft(imm uint8) Uint64x2 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. // // Asm: VPROLQ, CPU Feature: AVX512EVEX -func (x Uint64x4) RotateAllLeft(imm8 uint8) Uint64x4 +func (x Uint64x4) RotateAllLeft(imm uint8) Uint64x4 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. // // Asm: VPROLQ, CPU Feature: AVX512EVEX -func (x Uint64x8) RotateAllLeft(imm8 uint8) Uint64x8 +func (x Uint64x8) RotateAllLeft(imm uint8) Uint64x8 /* RotateAllRight */ // RotateAllRight rotates each element to the right by the number of bits specified by the immediate. // // Asm: VPRORD, CPU Feature: AVX512EVEX -func (x Int32x4) RotateAllRight(imm8 uint8) Int32x4 +func (x Int32x4) RotateAllRight(imm uint8) Int32x4 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate. // // Asm: VPRORD, CPU Feature: AVX512EVEX -func (x Int32x8) RotateAllRight(imm8 uint8) Int32x8 +func (x Int32x8) RotateAllRight(imm uint8) Int32x8 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate. // // Asm: VPRORD, CPU Feature: AVX512EVEX -func (x Int32x16) RotateAllRight(imm8 uint8) Int32x16 +func (x Int32x16) RotateAllRight(imm uint8) Int32x16 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate. // // Asm: VPRORQ, CPU Feature: AVX512EVEX -func (x Int64x2) RotateAllRight(imm8 uint8) Int64x2 +func (x Int64x2) RotateAllRight(imm uint8) Int64x2 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate. // // Asm: VPRORQ, CPU Feature: AVX512EVEX -func (x Int64x4) RotateAllRight(imm8 uint8) Int64x4 +func (x Int64x4) RotateAllRight(imm uint8) Int64x4 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate. // // Asm: VPRORQ, CPU Feature: AVX512EVEX -func (x Int64x8) RotateAllRight(imm8 uint8) Int64x8 +func (x Int64x8) RotateAllRight(imm uint8) Int64x8 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate. // // Asm: VPRORD, CPU Feature: AVX512EVEX -func (x Uint32x4) RotateAllRight(imm8 uint8) Uint32x4 +func (x Uint32x4) RotateAllRight(imm uint8) Uint32x4 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate. // // Asm: VPRORD, CPU Feature: AVX512EVEX -func (x Uint32x8) RotateAllRight(imm8 uint8) Uint32x8 +func (x Uint32x8) RotateAllRight(imm uint8) Uint32x8 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate. // // Asm: VPRORD, CPU Feature: AVX512EVEX -func (x Uint32x16) RotateAllRight(imm8 uint8) Uint32x16 +func (x Uint32x16) RotateAllRight(imm uint8) Uint32x16 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate. // // Asm: VPRORQ, CPU Feature: AVX512EVEX -func (x Uint64x2) RotateAllRight(imm8 uint8) Uint64x2 +func (x Uint64x2) RotateAllRight(imm uint8) Uint64x2 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate. // // Asm: VPRORQ, CPU Feature: AVX512EVEX -func (x Uint64x4) RotateAllRight(imm8 uint8) Uint64x4 +func (x Uint64x4) RotateAllRight(imm uint8) Uint64x4 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate. // // Asm: VPRORQ, CPU Feature: AVX512EVEX -func (x Uint64x8) RotateAllRight(imm8 uint8) Uint64x8 +func (x Uint64x8) RotateAllRight(imm uint8) Uint64x8 /* RotateLeft */ @@ -8436,37 +8436,37 @@ func (x Float64x4) Round() Float64x4 // Const Immediate = 8. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x4) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x4 +func (x Float32x4) RoundSuppressExceptionWithPrecision(imm uint8) Float32x4 // RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. // Const Immediate = 8. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x8) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x8 +func (x Float32x8) RoundSuppressExceptionWithPrecision(imm uint8) Float32x8 // RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. // Const Immediate = 8. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x16 +func (x Float32x16) RoundSuppressExceptionWithPrecision(imm uint8) Float32x16 // RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. // Const Immediate = 8. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x2) RoundSuppressExceptionWithPrecision(imm8 uint8) Float64x2 +func (x Float64x2) RoundSuppressExceptionWithPrecision(imm uint8) Float64x2 // RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. // Const Immediate = 8. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x4) RoundSuppressExceptionWithPrecision(imm8 uint8) Float64x4 +func (x Float64x4) RoundSuppressExceptionWithPrecision(imm uint8) Float64x4 // RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. // Const Immediate = 8. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x8) RoundSuppressExceptionWithPrecision(imm8 uint8) Float64x8 +func (x Float64x8) RoundSuppressExceptionWithPrecision(imm uint8) Float64x8 /* RoundWithPrecision */ @@ -8474,37 +8474,37 @@ func (x Float64x8) RoundSuppressExceptionWithPrecision(imm8 uint8) Float64x8 // Const Immediate = 0. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x4) RoundWithPrecision(imm8 uint8) Float32x4 +func (x Float32x4) RoundWithPrecision(imm uint8) Float32x4 // RoundWithPrecision rounds elements with specified precision. // Const Immediate = 0. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x8) RoundWithPrecision(imm8 uint8) Float32x8 +func (x Float32x8) RoundWithPrecision(imm uint8) Float32x8 // RoundWithPrecision rounds elements with specified precision. // Const Immediate = 0. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) RoundWithPrecision(imm8 uint8) Float32x16 +func (x Float32x16) RoundWithPrecision(imm uint8) Float32x16 // RoundWithPrecision rounds elements with specified precision. // Const Immediate = 0. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x2) RoundWithPrecision(imm8 uint8) Float64x2 +func (x Float64x2) RoundWithPrecision(imm uint8) Float64x2 // RoundWithPrecision rounds elements with specified precision. // Const Immediate = 0. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x4) RoundWithPrecision(imm8 uint8) Float64x4 +func (x Float64x4) RoundWithPrecision(imm uint8) Float64x4 // RoundWithPrecision rounds elements with specified precision. // Const Immediate = 0. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x8) RoundWithPrecision(imm8 uint8) Float64x8 +func (x Float64x8) RoundWithPrecision(imm uint8) Float64x8 /* SaturatedAdd */ @@ -9920,37 +9920,37 @@ func (x Float64x4) Trunc() Float64x4 // Const Immediate = 11. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x4) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x4 +func (x Float32x4) TruncSuppressExceptionWithPrecision(imm uint8) Float32x4 // TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. // Const Immediate = 11. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x8) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x8 +func (x Float32x8) TruncSuppressExceptionWithPrecision(imm uint8) Float32x8 // TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. // Const Immediate = 11. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x16 +func (x Float32x16) TruncSuppressExceptionWithPrecision(imm uint8) Float32x16 // TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. // Const Immediate = 11. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x2) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x2 +func (x Float64x2) TruncSuppressExceptionWithPrecision(imm uint8) Float64x2 // TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. // Const Immediate = 11. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x4) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x4 +func (x Float64x4) TruncSuppressExceptionWithPrecision(imm uint8) Float64x4 // TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. // Const Immediate = 11. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x8) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x8 +func (x Float64x8) TruncSuppressExceptionWithPrecision(imm uint8) Float64x8 /* TruncWithPrecision */ @@ -9958,37 +9958,37 @@ func (x Float64x8) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x8 // Const Immediate = 3. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x4) TruncWithPrecision(imm8 uint8) Float32x4 +func (x Float32x4) TruncWithPrecision(imm uint8) Float32x4 // TruncWithPrecision truncates elements with specified precision. // Const Immediate = 3. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x8) TruncWithPrecision(imm8 uint8) Float32x8 +func (x Float32x8) TruncWithPrecision(imm uint8) Float32x8 // TruncWithPrecision truncates elements with specified precision. // Const Immediate = 3. // // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) TruncWithPrecision(imm8 uint8) Float32x16 +func (x Float32x16) TruncWithPrecision(imm uint8) Float32x16 // TruncWithPrecision truncates elements with specified precision. // Const Immediate = 3. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x2) TruncWithPrecision(imm8 uint8) Float64x2 +func (x Float64x2) TruncWithPrecision(imm uint8) Float64x2 // TruncWithPrecision truncates elements with specified precision. // Const Immediate = 3. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x4) TruncWithPrecision(imm8 uint8) Float64x4 +func (x Float64x4) TruncWithPrecision(imm uint8) Float64x4 // TruncWithPrecision truncates elements with specified precision. // Const Immediate = 3. // // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x8) TruncWithPrecision(imm8 uint8) Float64x8 +func (x Float64x8) TruncWithPrecision(imm uint8) Float64x8 /* UnsignedSignedQuadDotProdAccumulate */ -- 2.52.0